From 5a8b4edb9c978a2cfa5d8dcb86771d4c83103e24 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Fri, 19 Jun 2026 15:04:16 +0800 Subject: [PATCH 1/6] support tunnel Signed-off-by: kerthcet --- Makefile | 10 +- README.md | 42 +- docs/TUNNEL.md | 531 ++++++++++++++++++ examples/tunnel-simple/README.md | 285 ++++++++++ examples/tunnel-simple/docker-compose.yml | 124 ++++ examples/tunnel-simple/headscale-config.yaml | 53 ++ .../docker/Dockerfile.alpine | 0 hack/docker/Dockerfile.daemon-tunnel | 37 ++ hack/docker/Dockerfile.daemon-tunnel-release | 38 ++ .../docker/Dockerfile.debian | 0 .../docker/Dockerfile.rocky | 0 hack/docker/Dockerfile.tunnel | 56 ++ hack/docker/Dockerfile.tunnel-release | 33 ++ hack/docker/README.md | 58 ++ .../docker/docker-compose.e2e.yml | 0 hack/scripts/install.sh | 261 +++++++++ pyproject.toml | 3 +- python/sandd/__init__.py | 3 +- python/sandd/server.py | 47 +- python/tests/test_e2e.py | 2 +- sandd/src/main.rs | 91 +++ server/src/lib.rs | 167 +++++- server/src/server.rs | 2 +- 23 files changed, 1820 insertions(+), 23 deletions(-) create mode 100644 docs/TUNNEL.md create mode 100644 examples/tunnel-simple/README.md create mode 100644 examples/tunnel-simple/docker-compose.yml create mode 100644 examples/tunnel-simple/headscale-config.yaml rename Dockerfile.alpine => hack/docker/Dockerfile.alpine (100%) create mode 100644 hack/docker/Dockerfile.daemon-tunnel create mode 100644 hack/docker/Dockerfile.daemon-tunnel-release rename Dockerfile.debian => hack/docker/Dockerfile.debian (100%) rename Dockerfile.rocky => hack/docker/Dockerfile.rocky (100%) create mode 100644 hack/docker/Dockerfile.tunnel create mode 100644 hack/docker/Dockerfile.tunnel-release create mode 100644 hack/docker/README.md rename docker-compose.e2e.yml => hack/docker/docker-compose.e2e.yml (100%) create mode 100755 hack/scripts/install.sh diff --git a/Makefile b/Makefile index 4c6360f..74fb00c 100644 --- a/Makefile +++ b/Makefile @@ -53,22 +53,22 @@ clean: test-e2e: $(PYTEST) dev @echo "Building Docker images..." - docker compose -f docker-compose.e2e.yml build + docker compose -f hack/docker/docker-compose.e2e.yml build @echo "" @echo "Running E2E tests with Docker..." $(PYTEST) python/tests/test_e2e.py -v -s @echo "" @echo "Cleaning up containers..." - docker compose -f docker-compose.e2e.yml down + docker compose -f hack/docker/docker-compose.e2e.yml down docker-build: - docker compose -f docker-compose.e2e.yml build + docker compose -f hack/docker/docker-compose.e2e.yml build docker-up: - docker compose -f docker-compose.e2e.yml up -d + docker compose -f hack/docker/docker-compose.e2e.yml up -d docker-down: - docker compose -f docker-compose.e2e.yml down + docker compose -f hack/docker/docker-compose.e2e.yml down .PHONY: lint lint: $(RUFF) diff --git a/README.md b/README.md index 1636efe..d1436c3 100644 --- a/README.md +++ b/README.md @@ -75,12 +75,24 @@ make install ### Daemon Binary (Worker) -Install from crates.io: +#### Quick Install + +```bash +# Direct mode (no tunnel) +curl -fsSL https://get.sandd.dev/install.sh | sudo bash + +# Tunnel mode (with Tailscale) +curl -fsSL https://get.sandd.dev/install.sh | sudo bash -s -- --tunnel +``` + +#### Alternative Methods + +**Install from crates.io:** ```bash cargo install sandd ``` -Or build from source: +**Build from source:** ```bash git clone https://github.com/InftyAI/SandD cd SandD @@ -90,24 +102,46 @@ make daemon-release ## Quick Start +### Direct Mode (Development) + **Start controller:** ```python from sandd import Server -server = Server("0.0.0.0", 8765) +server = Server() # Direct mode (default) server.wait_for_daemon("worker-1", timeout=30) result = server.exec("worker-1", "hostname") print(result.stdout) ``` -**Start worker:** +**Start daemon:** ```bash +# Direct mode sandd --server-url ws://controller-ip:8765/ws --daemon-id worker-1 + +# Tunnel mode +sandd --server-url ws://10.200.0.1:8765/ws \ + --daemon-id worker-1 \ + --tunnel \ + --tunnel-authkey YOUR_KEY \ + --tunnel-server http://headscale:8080 ``` +### Tunnel Mode (Production) + +For secure multi-cloud deployments with mesh VPN: + +```python +from sandd import Server + +server = Server(connect="tunnel") # Secure tunnel mode +``` + +See [Tunnel Mode Guide](./docs/TUNNEL.md) for setup instructions. + ## Documentation - [Quick Start Guide](./docs/QUICKSTART.md) diff --git a/docs/TUNNEL.md b/docs/TUNNEL.md new file mode 100644 index 0000000..30ab0ad --- /dev/null +++ b/docs/TUNNEL.md @@ -0,0 +1,531 @@ +# Tunnel Mode Overview + +SandD supports secure tunnel mode for production deployments using mesh VPN technology. + +## When to Use Tunnel Mode + +**Use tunnel mode when:** +- Deploying across multiple clouds (AWS + GCP + Azure) +- Controller should not be publicly accessible +- Need automatic NAT traversal +- Want network-level isolation + +**Use direct mode when:** +- Single datacenter / trusted network +- Development and testing +- Quick prototyping + +--- + +## Quick Comparison + +| Feature | Direct Mode | Tunnel Mode | +|---------|-------------|-------------| +| Setup | 5 minutes | 15 minutes | +| Controller IP | Public | Private (mesh) | +| NAT traversal | Manual | Automatic | +| Network isolation | ❌ | ✅ | +| Multi-cloud | ⚠️ Needs VPN | ✅ Built-in | + +--- + +## How Tunnel Mode Works + +### The Problem: NAT and Private Networks + +**Why you can't connect directly:** + +``` +Laptop (Controller) Cloud VM (Daemon) +Private: 192.168.1.100 Private: 10.0.1.20 +Behind home router Behind cloud firewall + +❌ Can't reach each other's private IPs +❌ Need to expose public ports (security risk) +❌ Need VPN peering between networks (complex) +``` + +### The Solution: Four Components Working Together + +**Secure mesh requires ALL four pieces:** + +``` +┌────────────────────────────────────────┐ +│ 1. Coordination (Headscale) │ +│ "Who can join? Where are they?" │ +│ → Authentication & peer discovery │ +└────────────────────────────────────────┘ + + +┌────────────────────────────────────────┐ +│ 2. NAT Traversal (Hole Punching) │ +│ "How do I reach you behind NAT?" │ +│ → Makes devices reachable │ +└────────────────────────────────────────┘ + + +┌────────────────────────────────────────┐ +│ 3. Encryption (WireGuard) │ +│ "How do I protect the data?" │ +│ → Confidentiality & integrity │ +└────────────────────────────────────────┘ + + +┌────────────────────────────────────────┐ +│ 4. Identity (Cryptographic Keys) │ +│ "How do I verify who you are?" │ +│ → Node authentication │ +└────────────────────────────────────────┘ + = + Secure Mesh Network +``` + +**What each component does:** + +| Component | Problem Solved | Without It | +|-----------|----------------|------------| +| **Headscale** | Who's allowed? Where are peers? | Can't find each other | +| **Hole Punching** | How to reach through NAT? | Can't connect | +| **WireGuard** | How to protect data? | Traffic readable | +| **Keys** | How to verify identity? | Anyone can impersonate | + +### Step-by-Step: How Connection Happens + +**1. Both sides connect OUT to Headscale** + +``` +┌──────────────────────────────┐ +│ Headscale (Public) │ +│ 203.0.113.100:8080 │ +└──────────────────────────────┘ + ↑ ↑ + │ Outbound ✓ │ Outbound ✓ + │ (firewalls allow) │ +┌────┴─────┐ ┌────┴─────┐ +│ Laptop │ │ Cloud VM │ +│ NAT hole │ │ NAT hole │ +│ created │ │ created │ +└──────────┘ └──────────┘ +``` + +**2. Headscale learns each node's "hole"** + +``` +Laptop connects → Headscale sees: 203.0.113.50:60001 +VM connects → Headscale sees: 198.51.100.25:41234 + +Headscale tells each about the other: +→ Laptop: "VM is at 198.51.100.25:41234" +→ VM: "Laptop is at 203.0.113.50:60001" +``` + +**3. Nodes punch holes simultaneously** + +``` +Both send packets at same time: +→ Laptop sends to VM's address +→ VM sends to Laptop's address + +NATs see outbound packets, allow replies +Result: Direct encrypted tunnel! ✓ +``` + +**4. WireGuard encrypts all traffic** + +``` +Every packet encrypted with: +- ChaCha20-Poly1305 (cipher) +- Curve25519 (key exchange) +- Authentication tags + +Even if intercepted: unreadable gibberish +``` + +--- + +## Architecture + +### Components + +**Headscale (Server)** +- Coordination server for mesh network +- Runs separately (single instance for entire mesh) +- Issues keys, manages peer discovery + +**Tailscale Client** +- VPN client that connects to Headscale +- Runs in each container (installed via `hack/docker/Dockerfile.tunnel`) +- Joins the mesh, creates tunnel interface + +**Your Application = Controller** +- When you call `Server()`, you ARE the controller +- It starts a WebSocket server that daemons connect to +- In tunnel mode, your app needs Tailscale to join the mesh + +### Direct Mode +``` +Daemon → Internet → Controller (public IP:8765) +``` + +### Tunnel Mode +``` +┌─────────────────────────────────────────────────────────┐ +│ Headscale Server │ +│ (runs once, centrally) │ +└─────────────────────────────────────────────────────────┘ + ↑ ↑ + │ │ +┌──────────▼──────────────────────┐ ┌─────────▼─────────┐ +│ Your Application (Controller) │ │ Daemon │ +│ │ │ (worker) │ +│ Server() starts WebSocket srv │ │ │ +│ (Tailscale client) │ │ (Tailscale client)│ +│ 10.200.0.1 │ │ 10.200.0.2 │ +└─────────────────────────────────┘ └───────────────────┘ + Private Mesh Network + +``` + +**Key:** `hack/docker/Dockerfile.tunnel` installs **Tailscale client** (not Headscale server). Headscale runs separately. + +--- + +## Using Tunnel Mode + +### In Your Application + +```python +from sandd import Server, TunnelConfig + +# Direct mode (default) +server = Server() + +# Tunnel mode +config = TunnelConfig( + authkey="your-headscale-preauth-key", + server="http://headscale:8080" +) +server = Server(connect="tunnel", tunnel_config=config) +``` + +### Docker Image + +Use the tunnel-enabled image. Build it yourself like this: +```bash +docker build -f hack/docker/Dockerfile.tunnel -t my-app:tunnel . +``` + +### Running + +```bash +# Your app code contains TunnelConfig with auth key and server URL +docker run \ + --cap-add NET_ADMIN \ + --device /dev/net/tun \ + my-app:tunnel +``` + +--- + +## Setup Steps + +### 1. Build Tunnel Image + +```bash +# From SandD repo +docker build -f hack/docker/Dockerfile.tunnel -t inftyai/sandd-server:latest-tunnel . +``` + +### 2. Run Headscale + +```bash +docker run -d \ + -p 8080:8080 \ + -v headscale-data:/var/lib/headscale \ + headscale/headscale:latest serve +``` + +### 3. Generate Auth Keys + +```bash +# Create user +docker exec headscale headscale users create sandd + +# Generate keys (save this!) +docker exec headscale headscale preauthkeys create --user sandd --expiration 24h +# Output: key-abc123def456... +``` + +### 4. Write Your Controller App + +```python +# controller.py +from sandd import Server, TunnelConfig +import time + +config = TunnelConfig( + authkey="key-abc123def456", # From step 3 + server="http://headscale:8080" +) + +server = Server(connect="tunnel", tunnel_config=config) +print("Controller ready, waiting for daemons...") + +while True: + daemons = server.list_daemons() + print(f"Connected: {len(daemons)}") + time.sleep(5) +``` + +### 5. Run Your Controller + +```bash +docker run \ + --cap-add NET_ADMIN \ + --device /dev/net/tun \ + -v $(pwd)/controller.py:/app/controller.py \ + inftyai/sandd-server:latest-tunnel \ + python /app/controller.py +``` + +--- + +## Complete Example + +See [examples/tunnel-simple/](../examples/tunnel-simple/) for a working docker-compose setup. + +```bash +cd examples/tunnel-simple +docker-compose up +``` + +--- + +## Communication Flow + +### Controller (Your App) +1. Container launches +2. `Server(connect="tunnel", tunnel_config=config)` called +3. Controller automatically starts Tailscale and joins mesh +4. Gets mesh IP (10.200.0.1) +5. WebSocket server starts on 10.200.0.1:8765 + +### Daemon +1. Run with `--tunnel` flag +2. `sandd` automatically starts Tailscale and joins mesh +3. Gets mesh IP (10.200.0.2) +4. Connects to controller at ws://10.200.0.1:8765/ws +5. Ready to execute commands + +**One command:** +```bash +sandd --server-url ws://10.200.0.1:8765/ws \ + --daemon-id worker-1 \ + --tunnel \ + --tunnel-authkey YOUR_KEY \ + --tunnel-server http://headscale:8080 +``` + +## Security Model + +### What's Protected + +**✅ Data in Transit** +- All traffic encrypted with WireGuard +- ChaCha20-Poly1305 cipher (military-grade) +- Perfect forward secrecy + +**✅ Authentication** +- Pre-auth keys control mesh access +- Public key cryptography (Curve25519) +- Each node has unique identity + +**✅ Network Isolation** +- Ports not exposed to internet +- Only mesh nodes can communicate +- Automatic NAT traversal (no manual firewall rules) + +### Key Types and Security + +**1. Auth Key (Pre-Auth Key)** + +```bash +# Single-use (recommended) +headscale preauthkeys create --user sandd --expiration 1h + +# Each node gets unique key +# Expires after first use +``` + +**If leaked:** Attacker can join mesh ❌ + +**Protection:** +- Use single-use keys +- Short expiration (1-24h) +- Rotate regularly +- Never commit to git + +**2. WireGuard Private Key** + +**Stored:** `/var/lib/tailscale/tailscaled.state` + +**If leaked:** Attacker can decrypt all traffic to/from that node ❌ + +**Protection:** +```bash +# File permissions +chmod 600 /var/lib/tailscale/tailscaled.state + +# Docker: use named volumes +volumes: + - tailscale-state:/var/lib/tailscale +``` + +**3. Shared Secret** + +**How it works:** Computed from your private key + peer's public key + +**Security:** Never transmitted, only exists in RAM ✓ + +### Comparison with Other Approaches + +| Security Aspect | Plain ws:// | wss:// (TLS) | Tailscale | +|----------------|-------------|--------------|-----------| +| **Encryption** | ❌ None | ✅ TLS 1.3 | ✅ WireGuard | +| **Authentication** | Manual | SSL certs | ✅ Built-in | +| **Port exposure** | ❌ Public | ❌ Public | ✅ Hidden | +| **NAT traversal** | Manual | Manual | ✅ Automatic | +| **Setup complexity** | Simple | Medium (certs) | Medium (Headscale) | +| **Zero-trust** | ❌ | ⚠️ CA-based | ✅ Crypto keys | + +### Attack Scenarios and Mitigations + +**Scenario 1: Auth Key Leaked** + +``` +Impact: Attacker joins mesh, accesses services + +Mitigation: +1. Revoke compromised key + headscale preauthkeys expire --prefix tskey-abc + +2. Remove unauthorized nodes + headscale nodes list + headscale nodes delete --identifier + +3. Generate new keys +4. Update all legitimate nodes +``` + +**Scenario 2: Node Compromised (Root Access)** + +``` +Impact: Attacker steals WireGuard key, decrypts traffic + +Mitigation: +1. Remove node from mesh + headscale nodes delete --identifier + +2. Delete state file on node + rm -rf /var/lib/tailscale/tailscaled.state + +3. Investigate compromise +4. Rejoin with new keys +``` + +**Scenario 3: Headscale Server Compromised** + +``` +Impact: +- Can see who's connected (metadata) +- Cannot decrypt traffic (end-to-end encrypted) + +Mitigation: +- Headscale doesn't store private keys +- Data never decrypted at coordinator +- Limit: Can kick nodes off, but can't read data +``` + +### Best Practices + +**Key Management:** +```bash +# ✅ DO: Single-use, short-lived +headscale preauthkeys create --expiration 1h + +# ❌ DON'T: Reusable, long-lived +headscale preauthkeys create --reusable --expiration 8760h +``` + +**Secrets Storage:** +```bash +# ✅ DO: Use secrets management +export KEY=$(vault read -field=key secret/sandd) + +# ❌ DON'T: Hardcode in files +SANDD_TUNNEL_AUTH_KEY=tskey-abc123 # Never commit! +``` + +**Monitoring:** +```bash +# Check for unauthorized nodes +headscale nodes list --output json | \ + jq '.[] | select(.created > "2024-01-01")' +``` + +--- + +## FAQ + +**Q: Is hole punching safe?** +A: Yes. Hole punching only finds the network path. All data is encrypted with WireGuard. Think of it like finding a road (hole punching) vs using an armored truck (encryption). + +**Q: Why not just use WebSocket with TLS?** +A: WebSocket needs a public IP and open ports. Tailscale works when controller is behind NAT (laptop, private cloud) and provides automatic encryption. + +**Q: Can Headscale read my data?** +A: No. Headscale only coordinates connections. Data is encrypted end-to-end between nodes. Headscale never sees decrypted traffic. + +**Q: What if my auth key leaks?** +A: Attacker can join your mesh. Use single-use keys and revoke immediately if leaked. See Security Model section. + +**Q: Why not install Headscale in my container?** +A: Headscale is a coordination server - you only need one for the entire mesh. Like DNS: one server, many clients. + +**Q: What's in `hack/docker/Dockerfile.tunnel`?** +A: Python 3.11, SandD library, and Tailscale client (not Headscale server). + +**Q: Do I need NET_ADMIN?** +A: Yes. VPN requires `--cap-add NET_ADMIN --device /dev/net/tun` + +--- + +## Troubleshooting + +### Check tunnel status + +```bash +# Inside container +docker exec tailscale status +docker exec tailscale ip +``` + +### Permission denied + +Ensure container has required capabilities: +```bash +--cap-add NET_ADMIN --device /dev/net/tun +``` + +### Can't reach controller + +Verify mesh IP: +```bash +docker exec controller tailscale ip -4 +# Use this IP in CONTROLLER_URL +``` + +--- + +## Next Steps + +- [Detailed Setup Guide](./networking/headscale.md) +- [Configuration Reference](./networking/configuration.md) +- [Kubernetes Deployment](./deployment/kubernetes.md) (coming soon) diff --git a/examples/tunnel-simple/README.md b/examples/tunnel-simple/README.md new file mode 100644 index 0000000..7dbb947 --- /dev/null +++ b/examples/tunnel-simple/README.md @@ -0,0 +1,285 @@ +# Tunnel Mode Example + +⚠️ **For Development/Testing Only** - See [production guide](../../docs/TUNNEL.md) for real deployments. + +This example demonstrates tunnel mode setup with Headscale. It shows how to run the controller; you'll launch daemons separately. + +--- + +## What's Included + +This example provides a **complete working setup**: +- **Headscale** - Coordination server (assigns mesh IPs) +- **Controller** - Your app running `Server()` (WebSocket server) +- **Daemon** - Worker that executes commands +- **Config** - Minimal Headscale configuration + +--- + +## Architecture + +``` +┌──────────────────────────────────────────────┐ +│ Headscale (Coordination Server) │ +│ Image: headscale/headscale:0.23.0 │ +│ Assigns IPs from 100.64.0.0/24 │ +└──────────────────────────────────────────────┘ + ↑ ↑ + │ │ +┌──────────┴─────────────┐ ┌──────┴────────────────┐ +│ Controller (in example) │ │ Daemon (you run this) │ +│ │ │ │ +│ Server() in Python │ │ sandd binary │ +│ Tailscale client │ │ Tailscale client │ +│ Mesh IP: 100.64.0.1 │ │ Mesh IP: 100.64.0.2 │ +│ Listens: :8765 │ │ Connects to 100.64.0.1│ +└─────────────────────────┘ └───────────────────────┘ + Private Mesh Network +``` + +--- + +## Quick Start + +### 1. (Optional) Build Images Manually + +You can either: +- **Option A:** Let docker-compose build automatically (recommended for quick start) +- **Option B:** Build images manually first (useful for testing builds) + +```bash +# Option B: Build manually from repo root +docker build -f hack/docker/Dockerfile.tunnel -t inftyai/sandd-server:latest-tunnel . +docker build -f hack/docker/Dockerfile.debian -t inftyai/sandd-daemon:debian . +``` + +**Note:** If you skip this step, docker-compose will build images automatically when you run `docker-compose up`. + +### 2. Start ONLY Headscale (Not App Yet!) + +```bash +cd examples/tunnel-simple + +# Start only Headscale first +docker-compose up -d headscale + +# Wait for it to be ready +sleep 2 +``` + +### 3. Create Headscale User and Auth Key + +```bash +# Create user +docker exec tunnel-simple-headscale-1 headscale users create sandd + +# Generate REUSABLE pre-auth key (SAVE THIS!) +# --reusable is required because both controller and daemon use the same key +# DO NOT enable this in production but generate a new key for each daemon in production. +docker exec tunnel-simple-headscale-1 headscale preauthkeys create \ + --user sandd \ + --expiration 24h \ + --reusable + +# Example output: +# key-abc123def456... +``` + +### 4. Start Everything with Auth Key + +```bash +# Export the key from step 3 +export SANDD_TUNNEL_AUTH_KEY=key-abc123def456 + +# Start controller + daemon (builds images if not already built) +docker-compose up -d + +# Watch logs +docker-compose logs -f +``` + +**What happens:** +- Controller joins mesh → gets `100.64.0.1` +- Daemon joins mesh → gets `100.64.0.2` +- Daemon connects to controller automatically +- Controller shows "Connected daemons: 1" + +### 5. Test the Connection + +```bash +# See all services +docker-compose ps + +# Check controller logs +docker-compose logs app + +# Check daemon logs +docker-compose logs daemon + +# Should see successful connection messages! +``` + +--- + +## Running Additional Daemons + +Want to add more workers? Run manually: + +```bash +# Use the SAME auth key from step 3 +sandd --server-url ws://100.64.0.1:8765/ws \ + --daemon-id worker-1 \ + --tunnel \ + --tunnel-authkey key-abc123def456 \ + --tunnel-server http://localhost:8080 +``` + +**What happens:** +1. Daemon starts Tailscale and joins mesh → gets `100.64.0.2` +2. Connects to controller at `ws://100.64.0.1:8765/ws` +3. Controller sees daemon and can send commands + + +--- + +## Configuration + +### Where Does 100.64.0.0/24 Come From? + +Open **headscale-config.yaml**: + +```yaml +ip_prefixes: + - 100.64.0.0/24 # ← Define mesh IP range here +``` + +When clients join: +- Controller → `100.64.0.1` (first client) +- Daemon 1 → `100.64.0.2` (second client) +- Daemon 2 → `100.64.0.3` (third client) + +**You can change this to any private range** (e.g., `100.64.0.0/16`). + +SandD doesn't hardcode IPs - it queries: `tailscale ip -4` to get the assigned mesh IP. + +--- + +## Running Multiple Daemons + +```bash +# Daemon 1 (gets 100.64.0.2) +sandd --server-url ws://100.64.0.1:8765/ws \ + --daemon-id worker-1 \ + --tunnel \ + --tunnel-authkey key-abc123 \ + --tunnel-server http://localhost:8080 + +# Daemon 2 (gets 100.64.0.3) +sandd --server-url ws://100.64.0.1:8765/ws \ + --daemon-id worker-2 \ + --tunnel \ + --tunnel-authkey key-abc123 \ + --tunnel-server http://localhost:8080 +``` + +**All daemons use the same auth key** (from step 3). + +--- + +## For Your Own App + +### Dockerfile + +```dockerfile +FROM inftyai/sandd-server:latest-tunnel + +COPY my_controller.py . +CMD ["python", "my_controller.py"] +``` + +### Controller Code + +```python +# my_controller.py +from sandd import Server, TunnelConfig +import os +import time + +config = TunnelConfig( + authkey=os.environ["TUNNEL_AUTH_KEY"], + server=os.environ["TUNNEL_SERVER"] +) + +server = Server(connect="tunnel", tunnel_config=config) +print("Controller ready at mesh IP") + +# Your logic +while True: + daemons = server.list_daemons() + print(f"Connected: {len(daemons)} daemons") + + for daemon in daemons: + result = server.exec(daemon.id, "hostname") + print(f"{daemon.id}: {result.stdout.strip()}") + + time.sleep(10) +``` + +### Run + +```bash +docker run \ + --cap-add NET_ADMIN \ + --device /dev/net/tun \ + -e TUNNEL_AUTH_KEY=key-abc123 \ + -e TUNNEL_SERVER=http://headscale:8080 \ + my-controller +``` + +--- + +## Cleanup + +```bash +# Stop services +docker-compose down + +# Remove data +docker-compose down -v +``` + +--- + +## Troubleshooting + +### Controller doesn't get mesh IP + +```bash +# Check Tailscale status inside controller +docker exec tunnel-simple-app-1 tailscale status +docker exec tunnel-simple-app-1 tailscale ip -4 +``` + +### Daemon can't connect + +```bash +# Verify controller mesh IP +docker exec tunnel-simple-app-1 tailscale ip -4 + +# Use that IP in daemon's --server-url +sandd --server-url ws://:8765/ws ... +``` + +### Check Headscale logs + +```bash +docker logs tunnel-simple-headscale-1 +``` + +--- + +## Next Steps + +- [Full Tunnel Guide](../../docs/TUNNEL.md) +- [Kubernetes Deployment](../../docs/deployment/kubernetes.md) (coming soon) +- [Production Best Practices](../../docs/deployment/production.md) (coming soon) diff --git a/examples/tunnel-simple/docker-compose.yml b/examples/tunnel-simple/docker-compose.yml new file mode 100644 index 0000000..a1ae7f5 --- /dev/null +++ b/examples/tunnel-simple/docker-compose.yml @@ -0,0 +1,124 @@ +version: '3.8' + +# Simple tunnel mode example with Headscale +# +# IMPORTANT: Start services in this order: +# 1. docker-compose up -d headscale +# 2. Generate auth key: docker exec tunnel-simple-headscale-1 headscale preauthkeys create --user sandd +# 3. export SANDD_TUNNEL_AUTH_KEY= +# 4. docker-compose up -d app +# +# See README.md for detailed instructions. + +services: + # Headscale coordination server + headscale: + image: headscale/headscale:0.23 + command: serve + volumes: + - ./headscale-config.yaml:/etc/headscale/config.yaml:ro + - headscale-data:/var/lib/headscale + ports: + - "8080:8080" + - "50443:50443" + networks: + - sandd + environment: + - TZ=UTC + + # Your application using SandD with tunnel + app: + hostname: controller + build: + context: ../.. + dockerfile: hack/docker/Dockerfile.tunnel + command: + - /bin/bash + - -c + - | + python3 << 'PYEOF' + import os + from sandd import Server, TunnelConfig + import time + + config = TunnelConfig( + authkey=os.environ["SANDD_TUNNEL_AUTH_KEY"], + server="http://headscale:8080" + ) + + print("Starting controller in tunnel mode...") + server = Server(connect="tunnel", tunnel_config=config) + print("Controller ready! Waiting for daemons...") + + while True: + daemons = server.list_daemons() + for daemon in daemons: + result = server.exec(daemon.id, "hostname") + print(f"{daemon.id}: {result.stdout.strip()}") + + time.sleep(10) + PYEOF + environment: + - SANDD_TUNNEL_AUTH_KEY=${SANDD_TUNNEL_AUTH_KEY:-} + cap_add: + - NET_ADMIN + devices: + - /dev/net/tun + depends_on: + - headscale + networks: + - sandd + + # Daemon worker (connects to controller via mesh) + daemon: + build: + context: ../.. + dockerfile: hack/docker/Dockerfile.daemon-tunnel + entrypoint: ["/bin/bash", "-c"] + command: + - | + set -e + echo "Starting Tailscale daemon..." + + # Start tailscaled in background + tailscaled --tun=userspace-networking --state=/var/lib/tailscale/tailscaled.state & + sleep 3 + + # Join mesh network + echo "Joining mesh network..." + tailscale up \ + --authkey=${SANDD_TUNNEL_AUTH_KEY:-} \ + --login-server=http://headscale:8080 \ + --accept-routes + + # Wait for mesh to stabilize + echo "Waiting for mesh..." + MY_IP=$$(tailscale ip -4) + echo "My mesh IP: $$MY_IP" + sleep 5 + + # Use MagicDNS hostname to connect to controller + CONTROLLER_HOST="controller" + echo "Connecting to controller via MagicDNS: $$CONTROLLER_HOST" + + # Start daemon (MagicDNS will resolve hostname to IP) + exec sandd --server-url=ws://$$CONTROLLER_HOST:8765/ws \ + --daemon-id=worker-1 + environment: + - RUST_LOG=info + cap_add: + - NET_ADMIN + devices: + - /dev/net/tun + depends_on: + - headscale + - app + networks: + - sandd + +volumes: + headscale-data: + +networks: + sandd: + driver: bridge diff --git a/examples/tunnel-simple/headscale-config.yaml b/examples/tunnel-simple/headscale-config.yaml new file mode 100644 index 0000000..291d0ce --- /dev/null +++ b/examples/tunnel-simple/headscale-config.yaml @@ -0,0 +1,53 @@ +# ⚠️ DEVELOPMENT ONLY - NOT FOR PRODUCTION +# +# This is a minimal Headscale configuration for testing SandD tunnel mode. +# For production deployments, see: https://headscale.net/ref/configuration/ + +# Server URL that clients connect to +server_url: http://headscale:8080 + +# Listen address +listen_addr: 0.0.0.0:8080 + +# IP range for the mesh network +# Using 100.64.0.0/10 (CGNAT range, RFC 6598) - recommended by Tailscale/Headscale +# This is where 100.64.0.1, 100.64.0.2, etc. are assigned from +prefixes: + v4: 100.64.0.0/10 + v6: fd7a:115c:a1e0::/48 + +# Private keys (auto-generated if not exist) +private_key_path: /var/lib/headscale/private.key +noise: + private_key_path: /var/lib/headscale/noise_private.key + +# Database (SQLite for simplicity) +database: + type: sqlite3 + path: /var/lib/headscale/db.sqlite + +# Logging +log: + level: info + format: text + +# DNS configuration (updated format) +dns: + magic_dns: true + base_domain: sandd.local + nameservers: + global: + - 1.1.1.1 + +# gRPC API for CLI commands (headscale users create, etc.) +grpc_listen_addr: 0.0.0.0:50443 +grpc_allow_insecure: true + +# DERP servers for NAT traversal (uses Tailscale's public servers) +derp: + server: + enabled: false + urls: + - https://controlplane.tailscale.com/derpmap/default + auto_update_enabled: true + update_frequency: 24h diff --git a/Dockerfile.alpine b/hack/docker/Dockerfile.alpine similarity index 100% rename from Dockerfile.alpine rename to hack/docker/Dockerfile.alpine diff --git a/hack/docker/Dockerfile.daemon-tunnel b/hack/docker/Dockerfile.daemon-tunnel new file mode 100644 index 0000000..a3e47b9 --- /dev/null +++ b/hack/docker/Dockerfile.daemon-tunnel @@ -0,0 +1,37 @@ +# SandD Daemon with Tunnel Support (Build from Source) +# Multi-stage build: builds the daemon binary and includes Tailscale + +# Stage 1: Build the daemon binary +FROM rust:bookworm AS builder + +WORKDIR /build +COPY . . + +# Build the daemon +RUN cargo build --release --bin sandd + +# Stage 2: Runtime image with Tailscale +FROM debian:bookworm-slim + +LABEL maintainer="InftyAI " +LABEL description="SandD daemon with Tailscale support" + +# Install Tailscale and dependencies +RUN apt-get update && apt-get install -y \ + curl \ + ca-certificates \ + iptables \ + iproute2 \ + && rm -rf /var/lib/apt/lists/* + +# Install Tailscale client +RUN curl -fsSL https://tailscale.com/install.sh | sh + +# Copy daemon binary from builder +COPY --from=builder /build/target/release/sandd /usr/local/bin/sandd +RUN chmod +x /usr/local/bin/sandd + +# Create directory for Tailscale state +RUN mkdir -p /var/lib/tailscale + +ENTRYPOINT ["sandd"] diff --git a/hack/docker/Dockerfile.daemon-tunnel-release b/hack/docker/Dockerfile.daemon-tunnel-release new file mode 100644 index 0000000..7244d83 --- /dev/null +++ b/hack/docker/Dockerfile.daemon-tunnel-release @@ -0,0 +1,38 @@ +# SandD Daemon with Tunnel Support (Release Version) +# Downloads pre-built binary from GitHub releases +# +# Build: docker build -f hack/docker/Dockerfile.daemon-tunnel-release \ +# --build-arg SANDD_VERSION=v0.1.0 \ +# -t inftyai/sandd-daemon:v0.1.0-tunnel . +# Run: docker run --cap-add NET_ADMIN --device /dev/net/tun inftyai/sandd-daemon:v0.1.0-tunnel + +FROM debian:bookworm-slim + +LABEL maintainer="InftyAI " +LABEL description="SandD daemon with Tailscale support (release version)" + +# Install Tailscale and dependencies +RUN apt-get update && apt-get install -y \ + curl \ + ca-certificates \ + iptables \ + iproute2 \ + && rm -rf /var/lib/apt/lists/* + +# Install Tailscale client +RUN curl -fsSL https://tailscale.com/install.sh | sh + +# Download pre-built daemon binary from GitHub releases +ARG SANDD_VERSION=v0.1.0 +ARG TARGETOS=linux +ARG TARGETARCH=amd64 + +RUN curl -fsSL \ + https://github.com/InftyAI/SandD/releases/download/${SANDD_VERSION}/sandd-${TARGETOS}-${TARGETARCH} \ + -o /usr/local/bin/sandd && \ + chmod +x /usr/local/bin/sandd + +# Create directory for Tailscale state +RUN mkdir -p /var/lib/tailscale + +ENTRYPOINT ["sandd"] diff --git a/Dockerfile.debian b/hack/docker/Dockerfile.debian similarity index 100% rename from Dockerfile.debian rename to hack/docker/Dockerfile.debian diff --git a/Dockerfile.rocky b/hack/docker/Dockerfile.rocky similarity index 100% rename from Dockerfile.rocky rename to hack/docker/Dockerfile.rocky diff --git a/hack/docker/Dockerfile.tunnel b/hack/docker/Dockerfile.tunnel new file mode 100644 index 0000000..4fd8566 --- /dev/null +++ b/hack/docker/Dockerfile.tunnel @@ -0,0 +1,56 @@ +# SandD with Tunnel Support (Build from Source) +# This Dockerfile includes Tailscale client for connecting to Headscale mesh +# +# Architecture: +# Headscale (coordinator server - runs separately) +# ↑ +# └─ Tailscale client (installed in this image) +# +# Build: docker build -f Dockerfile.tunnel -t inftyai/sandd-server:latest-tunnel . +# Run: docker run --cap-add NET_ADMIN --device /dev/net/tun inftyai/sandd-server:latest-tunnel + +FROM python:3.11-slim-bookworm + +LABEL maintainer="InftyAI " +LABEL description="SandD with tunnel support via Headscale" + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + curl \ + ca-certificates \ + iptables \ + iproute2 \ + git \ + build-essential \ + pkg-config \ + libssl-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install Tailscale client (connects to Headscale server) +# Note: Headscale server runs separately, this is just the client +RUN curl -fsSL https://tailscale.com/install.sh | sh + +# Install Rust (for building SandD) +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +# Copy SandD source +WORKDIR /build +COPY . . + +# Build and install SandD Python package (from root where pyproject.toml is) +RUN pip install --no-cache-dir maturin && \ + maturin build --release && \ + pip install --no-cache-dir target/wheels/*.whl + +# Cleanup build dependencies +WORKDIR /app +RUN rm -rf /build && \ + apt-get remove -y build-essential && \ + apt-get autoremove -y + +# Create directory for Tailscale state +RUN mkdir -p /var/lib/tailscale + +# Default command +CMD ["python", "-c", "from sandd import Server; print('SandD with tunnel support ready')"] diff --git a/hack/docker/Dockerfile.tunnel-release b/hack/docker/Dockerfile.tunnel-release new file mode 100644 index 0000000..8524d32 --- /dev/null +++ b/hack/docker/Dockerfile.tunnel-release @@ -0,0 +1,33 @@ +# SandD Server with Tunnel Support (Release Version) +# Uses published PyPI package instead of building from source +# +# Build: docker build -f hack/docker/Dockerfile.tunnel-release -t inftyai/sandd-server:v0.1.0-tunnel . +# Run: docker run --cap-add NET_ADMIN --device /dev/net/tun inftyai/sandd-server:v0.1.0-tunnel + +FROM python:3.11-slim-bookworm + +LABEL maintainer="InftyAI " +LABEL description="SandD server with tunnel support (release version)" + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + curl \ + ca-certificates \ + iptables \ + iproute2 \ + && rm -rf /var/lib/apt/lists/* + +# Install Tailscale client +RUN curl -fsSL https://tailscale.com/install.sh | sh + +# Install SandD from PyPI (specify version or use latest) +ARG SANDD_VERSION=0.0.0 +RUN pip install --no-cache-dir sandd==${SANDD_VERSION} + +# Create directory for Tailscale state +RUN mkdir -p /var/lib/tailscale + +WORKDIR /app + +# Default command +CMD ["python", "-c", "from sandd import Server; print('SandD server ready')"] diff --git a/hack/docker/README.md b/hack/docker/README.md new file mode 100644 index 0000000..1d48fd3 --- /dev/null +++ b/hack/docker/README.md @@ -0,0 +1,58 @@ +# Docker Files + +This directory contains Docker-related files for building and testing SandD. + +## Files + +### Dockerfiles + +#### Server (Controller) Images + +- **`Dockerfile.tunnel`** - Server with Tailscale (build from source) + - Use: Development and testing + - Build: `docker build -f hack/docker/Dockerfile.tunnel -t inftyai/sandd-server:latest-tunnel .` + - See: [docs/TUNNEL.md](../../docs/TUNNEL.md) + +- **`Dockerfile.tunnel-release`** - Server with Tailscale (uses PyPI release) + - Use: Production deployments + - Build: `docker build -f hack/docker/Dockerfile.tunnel-release --build-arg SANDD_VERSION=0.1.0 -t inftyai/sandd-server:v0.1.0-tunnel .` + +#### Daemon (Worker) Images + +- **`Dockerfile.daemon-tunnel`** - Daemon with Tailscale (build from source) + - Use: Development and testing + - Build: `docker build -f hack/docker/Dockerfile.daemon-tunnel -t inftyai/sandd-daemon:latest-tunnel .` + +- **`Dockerfile.daemon-tunnel-release`** - Daemon with Tailscale (uses GitHub release) + - Use: Production deployments + - Build: `docker build -f hack/docker/Dockerfile.daemon-tunnel-release --build-arg SANDD_VERSION=v0.1.0 -t inftyai/sandd-daemon:v0.1.0-tunnel .` + +#### Test Images (Direct Mode) + +- **`Dockerfile.debian`** - Debian-based daemon (for testing) +- **`Dockerfile.alpine`** - Alpine-based daemon (for testing) +- **`Dockerfile.rocky`** - Rocky Linux-based daemon (for testing) + +### Docker Compose + +- **`docker-compose.e2e.yml`** - End-to-end testing setup + - Runs controller + multiple daemons (Debian, Alpine, Rocky) + - Used by: `python/tests/test_e2e.py` + - Run: `docker compose -f hack/docker/docker-compose.e2e.yml up` + +## Building + +### Build tunnel-enabled image + +```bash +# From repo root +docker build -f hack/docker/Dockerfile.tunnel -t inftyai/sandd-server:latest-tunnel . +``` + +### Build test images + +```bash +docker build -f hack/docker/Dockerfile.debian -t inftyai/sandd-daemon:debian . +docker build -f hack/docker/Dockerfile.alpine -t inftyai/sandd-daemon:alpine . +docker build -f hack/docker/Dockerfile.rocky -t inftyai/sandd-daemon:rocky . +``` diff --git a/docker-compose.e2e.yml b/hack/docker/docker-compose.e2e.yml similarity index 100% rename from docker-compose.e2e.yml rename to hack/docker/docker-compose.e2e.yml diff --git a/hack/scripts/install.sh b/hack/scripts/install.sh new file mode 100755 index 0000000..10fa7a8 --- /dev/null +++ b/hack/scripts/install.sh @@ -0,0 +1,261 @@ +#!/bin/bash +# SandD Daemon Installation Script +# +# Usage: +# curl -fsSL https://raw.githubusercontent.com/InftyAI/SandD/main/hack/scripts/install.sh | bash +# curl -fsSL https://raw.githubusercontent.com/InftyAI/SandD/main/hack/scripts/install.sh | bash -s -- --tunnel +# +# Or locally: +# ./scripts/install.sh +# ./scripts/install.sh --tunnel + +set -e + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Configuration +INSTALL_TUNNEL=false +SANDD_VERSION="${SANDD_VERSION:-latest}" +INSTALL_DIR="/usr/local/bin" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + --tunnel) + INSTALL_TUNNEL=true + shift + ;; + --version) + SANDD_VERSION="$2" + shift 2 + ;; + --help) + echo "SandD Daemon Installation Script" + echo "" + echo "Usage:" + echo " $0 [options]" + echo "" + echo "Options:" + echo " --tunnel Install with tunnel support (includes Tailscale)" + echo " --version VER Install specific version (default: latest)" + echo " --help Show this help message" + echo "" + echo "Examples:" + echo " $0 # Direct mode only" + echo " $0 --tunnel # With tunnel support" + exit 0 + ;; + *) + echo -e "${RED}Unknown option: $1${NC}" + echo "Use --help for usage information" + exit 1 + ;; + esac +done + +# Helper functions +log_info() { + echo -e "${GREEN}==>${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}Warning:${NC} $1" +} + +log_error() { + echo -e "${RED}Error:${NC} $1" +} + +check_root() { + if [[ $EUID -ne 0 ]]; then + log_error "This script must be run as root (use sudo)" + exit 1 + fi +} + +detect_os() { + if [[ -f /etc/os-release ]]; then + . /etc/os-release + OS=$ID + OS_VERSION=$VERSION_ID + elif [[ "$(uname)" == "Darwin" ]]; then + OS="macos" + else + log_error "Unsupported operating system" + exit 1 + fi + + ARCH=$(uname -m) + case $ARCH in + x86_64) + ARCH="amd64" + ;; + aarch64|arm64) + ARCH="arm64" + ;; + *) + log_error "Unsupported architecture: $ARCH" + exit 1 + ;; + esac + + log_info "Detected: $OS ($ARCH)" +} + +install_dependencies() { + log_info "Installing dependencies..." + + case $OS in + ubuntu|debian) + apt-get update + apt-get install -y curl ca-certificates + ;; + centos|rhel|rocky|fedora) + yum install -y curl ca-certificates + ;; + macos) + # Assume Homebrew is installed + if ! command -v brew &> /dev/null; then + log_warn "Homebrew not found. Install from https://brew.sh" + fi + ;; + *) + log_warn "Unknown OS, skipping dependency installation" + ;; + esac +} + +install_sandd() { + log_info "Installing SandD daemon..." + + # Check if Rust/Cargo is installed + if command -v cargo &> /dev/null; then + log_info "Installing via cargo..." + cargo install sandd + else + # Download binary + log_info "Downloading binary..." + DOWNLOAD_URL="https://github.com/InftyAI/SandD/releases/download/${SANDD_VERSION}/sandd-${OS}-${ARCH}" + + curl -fsSL "$DOWNLOAD_URL" -o /tmp/sandd + chmod +x /tmp/sandd + mv /tmp/sandd "$INSTALL_DIR/sandd" + + log_info "Installed to $INSTALL_DIR/sandd" + fi +} + +install_tailscale() { + log_info "Installing Tailscale..." + + case $OS in + ubuntu|debian) + curl -fsSL https://tailscale.com/install.sh | sh + ;; + centos|rhel|rocky|fedora) + curl -fsSL https://tailscale.com/install.sh | sh + ;; + macos) + if command -v brew &> /dev/null; then + brew install tailscale + else + log_warn "Please install Tailscale from https://tailscale.com/download" + fi + ;; + *) + log_warn "Please install Tailscale manually from https://tailscale.com/download" + ;; + esac + + # Verify installation + if command -v tailscale &> /dev/null; then + log_info "Tailscale installed: $(tailscale version | head -1)" + else + log_error "Tailscale installation failed" + exit 1 + fi +} + +verify_installation() { + log_info "Verifying installation..." + + if command -v sandd &> /dev/null; then + log_info "✓ sandd installed at: $(command -v sandd)" + else + log_error "sandd installation verification failed" + exit 1 + fi + + if [[ "$INSTALL_TUNNEL" == "true" ]]; then + if command -v tailscale &> /dev/null; then + log_info "✓ Tailscale installed at: $(command -v tailscale)" + else + log_error "Tailscale installation verification failed" + exit 1 + fi + fi +} + +print_next_steps() { + echo "" + echo -e "${GREEN}========================================${NC}" + echo -e "${GREEN}Installation Complete!${NC}" + echo -e "${GREEN}========================================${NC}" + echo "" + + if [[ "$INSTALL_TUNNEL" == "true" ]]; then + echo "Tunnel mode installed." + echo "" + echo "To run the daemon:" + echo "" + echo " sandd --server-url ws://10.200.0.1:8765/ws \\" + echo " --daemon-id worker-1 \\" + echo " --tunnel \\" + echo " --tunnel-authkey YOUR_KEY \\" + echo " --tunnel-server http://headscale:8080" + else + echo "Direct mode installed." + echo "" + echo "To run the daemon:" + echo "" + echo " sandd --server-url ws://controller:8765/ws --daemon-id worker-1" + fi + + echo "" + echo "Documentation: https://github.com/InftyAI/SandD/tree/main/docs" + echo "" +} + +# Main installation flow +main() { + echo "" + echo "SandD Daemon Installer" + echo "======================" + echo "" + + if [[ "$INSTALL_TUNNEL" == "true" ]]; then + log_info "Mode: Tunnel (with Tailscale)" + else + log_info "Mode: Direct" + fi + echo "" + + check_root + detect_os + install_dependencies + # install_sandd + + if [[ "$INSTALL_TUNNEL" == "true" ]]; then + install_tailscale + fi + + verify_installation + print_next_steps +} + +# Run main +main diff --git a/pyproject.toml b/pyproject.toml index 5d95c6c..7ee2c10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dev = [ [tool.maturin] module-name = "sandd._core" python-source = "python" +manifest-path = "server/Cargo.toml" features = ["pyo3/extension-module"] include = [ "server/**/*", @@ -39,7 +40,7 @@ include = [ ] exclude = [ "Cargo.toml", - "sandd/**/*", + "/sandd", # Only exclude root sandd/ directory (Rust daemon source) "target/**/*", ".git/**/*", ] diff --git a/python/sandd/__init__.py b/python/sandd/__init__.py index a766202..c238e56 100644 --- a/python/sandd/__init__.py +++ b/python/sandd/__init__.py @@ -44,7 +44,7 @@ from .async_server import AsyncServer try: - from ._core import Session + from ._core import Session, TunnelConfig except ImportError as e: raise ImportError( "Failed to import Rust extension. " @@ -58,4 +58,5 @@ "CommandResult", "ServerStats", "DaemonInfo", + "TunnelConfig", ] diff --git a/python/sandd/server.py b/python/sandd/server.py index 2e1f9bb..23c0aa0 100644 --- a/python/sandd/server.py +++ b/python/sandd/server.py @@ -8,7 +8,7 @@ from .models import CommandResult, ServerStats, DaemonInfo try: - from ._core import Server as _RustServer, Session + from ._core import Server as _RustServer, Session, TunnelConfig except ImportError as e: raise ImportError( "Failed to import Rust extension. " @@ -25,21 +25,54 @@ class Server: Args: host: Bind address (default: "0.0.0.0") port: Bind port (default: 8765) + connect: Connection mode (default: "direct") + - "direct": Direct WebSocket connections + - "tunnel": Secure VPN tunnel via Headscale + tunnel_config: Tunnel configuration (required if connect="tunnel") verbose: Enable logging at INFO level (default: True) Set to False to disable logs (useful for interactive sessions) Example: - >>> server = Server("0.0.0.0", 8765) - >>> server.wait_for_daemon("daemon-1", timeout=30) + >>> # Development (direct connection) + >>> server = Server() >>> result = server.exec("daemon-1", "hostname") >>> print(result.stdout) - >>> # Disable logs for clean output, useful for interactive sessions - >>> server = Server("0.0.0.0", 8765, verbose=False) + >>> # Production (secure tunnel) + >>> from sandd import TunnelConfig + >>> config = TunnelConfig( + ... authkey="your-key", + ... server="http://headscale:8080" + ... ) + >>> server = Server(connect="tunnel", tunnel_config=config) + >>> result = server.exec("daemon-1", "hostname") """ - def __init__(self, host: str = "0.0.0.0", port: int = 8765, verbose: bool = True): - self._server = _RustServer(host, port, verbose) + def __init__( + self, + host: str = "0.0.0.0", + port: int = 8765, + connect: str = "direct", + tunnel_config: Optional[TunnelConfig] = None, + verbose: bool = True + ): + if connect not in ["direct", "tunnel"]: + raise ValueError( + f"connect must be 'direct' or 'tunnel', got '{connect}'" + ) + + if connect == "tunnel" and tunnel_config is None: + raise ValueError( + "tunnel mode requires tunnel_config parameter\n" + "Example: Server(connect='tunnel', tunnel_config=TunnelConfig(authkey='xxx', server='http://headscale:8080'))" + ) + + self._connect = connect + self._tunnel_config = tunnel_config + + # Pass tunnel config object to Rust + self._server = _RustServer(host, port, verbose, connect, tunnel_config) + self._host = host self._port = port diff --git a/python/tests/test_e2e.py b/python/tests/test_e2e.py index 3f73834..8126e35 100644 --- a/python/tests/test_e2e.py +++ b/python/tests/test_e2e.py @@ -11,7 +11,7 @@ @pytest.fixture(scope="module") def docker_daemons(): """Start Docker containers with daemons""" - compose_file = "docker-compose.e2e.yml" + compose_file = "hack/docker/docker-compose.e2e.yml" # Build and start containers subprocess.run( diff --git a/sandd/src/main.rs b/sandd/src/main.rs index 635207d..19a9a4c 100644 --- a/sandd/src/main.rs +++ b/sandd/src/main.rs @@ -39,6 +39,18 @@ struct Args { /// Labels in key=value format (e.g., --label env=prod --label region=us-west) #[arg(short, long = "label", value_name = "KEY=VALUE")] labels: Vec, + + /// Enable tunnel mode (requires Tailscale) + #[arg(long)] + tunnel: bool, + + /// Tunnel auth key (required if --tunnel is set) + #[arg(long)] + tunnel_authkey: Option, + + /// Tunnel control server URL (required if --tunnel is set) + #[arg(long)] + tunnel_server: Option, } #[tokio::main] @@ -56,6 +68,7 @@ async fn main() -> Result<()> { // Generate daemon ID if not provided let daemon_id = args .daemon_id + .clone() .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()); // Parse labels from key=value format @@ -73,6 +86,12 @@ async fn main() -> Result<()> { info!("Labels: {:?}", labels); } + // Handle tunnel mode + if args.tunnel { + info!("Tunnel mode enabled"); + setup_tunnel(&args).await?; + } + // Main connection loop with reconnection loop { match connect_and_serve( @@ -433,3 +452,75 @@ async fn handle_intree_command(cmd: &str) -> Result { _ => Err(anyhow::anyhow!("Unknown in-tree command: {}", cmd)), } } + +async fn setup_tunnel(args: &Args) -> Result<()> { + use std::process::Command; + + // Validate required arguments + let authkey = args + .tunnel_authkey + .as_ref() + .ok_or_else(|| anyhow::anyhow!("--tunnel requires --tunnel-authkey"))?; + + let server = args + .tunnel_server + .as_ref() + .ok_or_else(|| anyhow::anyhow!("--tunnel requires --tunnel-server"))?; + + // Check if tailscale is installed + let tailscale_check = Command::new("which").arg("tailscale").output(); + + if tailscale_check.is_err() || !tailscale_check.unwrap().status.success() { + return Err(anyhow::anyhow!( + "Tailscale not found. Install it first:\n \ + curl -fsSL https://raw.githubusercontent.com/InftyAI/SandD/main/hack/scripts/install.sh | sudo bash -s -- --tunnel" + )); + } + + info!("Starting tailscaled..."); + + // Start tailscaled in background (if not already running) + let _tailscaled = Command::new("tailscaled") + .arg("--tun=userspace-networking") + .arg("--state=/var/lib/tailscale/tailscaled.state") + .spawn(); + + // Give tailscaled time to start + tokio::time::sleep(Duration::from_secs(2)).await; + + info!("Joining mesh network..."); + + // Join mesh + let output = Command::new("tailscale") + .arg("up") + .arg(format!("--authkey={}", authkey)) + .arg(format!("--login-server={}", server)) + .arg("--accept-routes") + .output()?; + + if !output.status.success() { + return Err(anyhow::anyhow!( + "Failed to join mesh: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + + // Wait for IP assignment + for _ in 0..30 { + let ip_output = Command::new("tailscale").arg("ip").arg("-4").output(); + + if let Ok(output) = ip_output { + if output.status.success() { + let ip = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if !ip.is_empty() { + info!("✓ Joined mesh network with IP: {}", ip); + return Ok(()); + } + } + } + + tokio::time::sleep(Duration::from_secs(1)).await; + } + + Err(anyhow::anyhow!("Timeout waiting for mesh IP assignment")) +} diff --git a/server/src/lib.rs b/server/src/lib.rs index 7596daa..7d16893 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -21,6 +21,28 @@ use protocol::Message; use registry::DaemonRegistry; use server::SandboxServer; +/// Tunnel configuration +#[pyclass] +#[derive(Clone)] +pub struct TunnelConfig { + #[pyo3(get, set)] + pub authkey: String, + #[pyo3(get, set)] + pub server: String, +} + +#[pymethods] +impl TunnelConfig { + #[new] + fn new(authkey: String, server: String) -> Self { + Self { authkey, server } + } + + fn __repr__(&self) -> String { + format!("TunnelConfig(server={})", self.server) + } +} + /// Python wrapper for the Rust server #[pyclass] pub struct Server { @@ -32,8 +54,34 @@ pub struct Server { #[pymethods] impl Server { #[new] - #[pyo3(signature = (host="0.0.0.0".to_string(), port=8765, verbose=true))] - fn new(host: String, port: u16, verbose: bool) -> PyResult { + #[pyo3(signature = ( + host="0.0.0.0".to_string(), + port=8765, + verbose=true, + connect="direct".to_string(), + tunnel_config=None + ))] + fn new( + host: String, + port: u16, + verbose: bool, + connect: String, + tunnel_config: Option, + ) -> PyResult { + // Validate connect parameter + if connect != "direct" && connect != "tunnel" { + return Err(PyValueError::new_err( + format!("connect must be 'direct' or 'tunnel', got '{}'", connect) + )); + } + + // Validate tunnel parameters + if connect == "tunnel" && tunnel_config.is_none() { + return Err(PyValueError::new_err( + "tunnel mode requires tunnel_config parameter" + )); + } + // Initialize logging: INFO by default, unless verbose=False // RUST_LOG env var can override (e.g., RUST_LOG=debug) if verbose { @@ -48,7 +96,31 @@ impl Server { let runtime = Runtime::new() .map_err(|e| PyRuntimeError::new_err(format!("Failed to create runtime: {}", e)))?; - let bind_addr = format!("{}:{}", host, port); + // Handle tunnel mode + let bind_addr = if connect == "tunnel" { + let config = tunnel_config.unwrap(); + + // Setup tunnel + runtime.block_on(async { + setup_tunnel_controller(&config).await + .map_err(|e| PyRuntimeError::new_err(format!("Tunnel setup failed: {}", e))) + })?; + + // Get mesh IP (for logging only) + let mesh_ip = runtime.block_on(async { + get_mesh_ip().await + .map_err(|e| PyRuntimeError::new_err(format!("Failed to get mesh IP: {}", e))) + })?; + + tracing::info!("Controller mesh IP: {} (binding to 0.0.0.0:{})", mesh_ip, port); + + // Bind to 0.0.0.0 instead of mesh IP + // Tailscale will route traffic to this port through the mesh + format!("0.0.0.0:{}", port) + } else { + format!("{}:{}", host, port) + }; + let server = SandboxServer::new(bind_addr); let registry = server.registry(); @@ -402,11 +474,100 @@ pub struct PyStats { pub oldest_connection_secs: u64, } +/// Setup tunnel for controller +async fn setup_tunnel_controller(config: &TunnelConfig) -> anyhow::Result<()> { + use std::process::Command; + + // Check if tailscale is installed + let tailscale_check = Command::new("which") + .arg("tailscale") + .output(); + + if tailscale_check.is_err() || !tailscale_check.unwrap().status.success() { + return Err(anyhow::anyhow!( + "Tailscale not found. Install it first:\n \ + curl -fsSL https://tailscale.com/install.sh | sh" + )); + } + + tracing::info!("Starting tailscaled..."); + + // Start tailscaled in background (if not already running) + let _tailscaled = Command::new("tailscaled") + .arg("--tun=userspace-networking") + .arg("--state=/var/lib/tailscale/tailscaled.state") + .spawn(); + + // Give tailscaled time to start + tokio::time::sleep(Duration::from_secs(2)).await; + + tracing::info!("Joining mesh network..."); + + // Join mesh + let output = Command::new("tailscale") + .arg("up") + .arg(format!("--authkey={}", config.authkey)) + .arg(format!("--login-server={}", config.server)) + .arg("--accept-routes") + .output()?; + + if !output.status.success() { + return Err(anyhow::anyhow!( + "Failed to join mesh: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + + // Wait for IP assignment + for _ in 0..30 { + let ip_output = Command::new("tailscale") + .arg("ip") + .arg("-4") + .output(); + + if let Ok(output) = ip_output { + if output.status.success() { + let ip = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if !ip.is_empty() { + tracing::info!("✓ Controller joined mesh network with IP: {}", ip); + return Ok(()); + } + } + } + + tokio::time::sleep(Duration::from_secs(1)).await; + } + + Err(anyhow::anyhow!("Timeout waiting for mesh IP assignment")) +} + +/// Get mesh IP address +async fn get_mesh_ip() -> anyhow::Result { + use std::process::Command; + + let output = Command::new("tailscale") + .arg("ip") + .arg("-4") + .output()?; + + if !output.status.success() { + return Err(anyhow::anyhow!("Failed to get mesh IP")); + } + + let ip = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if ip.is_empty() { + return Err(anyhow::anyhow!("No mesh IP assigned")); + } + + Ok(ip) +} + /// Python module #[pymodule] fn _core(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/server/src/server.rs b/server/src/server.rs index 89f5f8c..2babef2 100644 --- a/server/src/server.rs +++ b/server/src/server.rs @@ -178,7 +178,7 @@ async fn handle_daemon_message( *daemon_id = Some(id.clone()); info!( - "Daemon {} registered: {} {} {}", + "Daemon {} registered: hostname={} platform={} arch={}", id, metadata.hostname, metadata.platform, metadata.arch ); From 4570ce3cc25d2fcc1ce668a824ca3e1b01f49057 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Fri, 19 Jun 2026 15:13:40 +0800 Subject: [PATCH 2/6] update TUNNEL.md Signed-off-by: kerthcet --- docs/TUNNEL.md | 72 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 9 deletions(-) diff --git a/docs/TUNNEL.md b/docs/TUNNEL.md index 30ab0ad..eb72c52 100644 --- a/docs/TUNNEL.md +++ b/docs/TUNNEL.md @@ -17,15 +17,69 @@ SandD supports secure tunnel mode for production deployments using mesh VPN tech --- -## Quick Comparison - -| Feature | Direct Mode | Tunnel Mode | -|---------|-------------|-------------| -| Setup | 5 minutes | 15 minutes | -| Controller IP | Public | Private (mesh) | -| NAT traversal | Manual | Automatic | -| Network isolation | ❌ | ✅ | -| Multi-cloud | ⚠️ Needs VPN | ✅ Built-in | +## Direct Mode vs Tunnel Mode (VPN) + +### Visual Comparison + +**Direct Mode (No VPN):** +``` +┌──────────┐ ┌──────────┐ +│ Daemon │──── WebSocket over ───→│Controller│ +│ │ public internet │Public IP │ +└──────────┘ └──────────┘ + +- Direct WebSocket connection +- No VPN +- Controller needs public IP +- Daemons connect over internet +``` + +**Tunnel Mode (Mesh VPN):** +``` +┌──────────┐ ┌──────────┐ +│ Daemon │════ VPN tunnel ════════│Controller│ +│ Mesh IP │ WireGuard encrypted │ Mesh IP │ +└──────────┘ └──────────┘ + ↓ ↓ + Join VPN Join VPN + ↓ ↓ +┌────────────────────────────────────────────┐ +│ Headscale (VPN coordinator) │ +└────────────────────────────────────────────┘ + +- VPN mesh network +- Encrypted tunnels between nodes +- Private mesh IPs +- No public IPs needed +``` + +### Feature Comparison + +| Feature | Direct Mode | Tunnel Mode (VPN) | +|---------|-------------|-------------------| +| **Setup complexity** | Simple (5 min) | Medium (15 min) | +| **Controller IP** | Must be public | Can be private | +| **Daemon location** | Anywhere (outbound) | Anywhere (mesh) | +| **NAT traversal** | Manual (firewall rules) | Automatic (hole punching) | +| **Encryption** | Need to add TLS | Built-in (WireGuard) | +| **Port exposure** | Public (attack surface) | Hidden (mesh only) | +| **Multi-cloud** | Need VPC peering | Works automatically | +| **Use case** | Single cloud/datacenter | Cross-cloud, laptop↔cloud | + +### When to Use Each + +**Use Direct Mode when:** +- ✅ Controller has stable public IP +- ✅ Single cloud or trusted network +- ✅ Development and testing +- ✅ Simple setup preferred + +**Use Tunnel Mode (VPN) when:** +- ✅ Controller behind NAT (laptop, home, corporate) +- ✅ Multiple clouds (AWS + GCP + Azure) +- ✅ Don't want exposed ports +- ✅ Need encrypted communication +- ✅ Dynamic IPs or ephemeral instances --- From bceda27e9137be0a148ac432df2277f3119a8048 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Fri, 19 Jun 2026 23:24:49 +0800 Subject: [PATCH 3/6] fix test Signed-off-by: kerthcet --- hack/docker/docker-compose.e2e.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/hack/docker/docker-compose.e2e.yml b/hack/docker/docker-compose.e2e.yml index f7d64ef..1d15025 100644 --- a/hack/docker/docker-compose.e2e.yml +++ b/hack/docker/docker-compose.e2e.yml @@ -4,8 +4,8 @@ services: # Debian-based daemons daemon-debian-1: build: - context: . - dockerfile: Dockerfile.debian + context: ../.. + dockerfile: hack/docker/Dockerfile.debian container_name: sandd-daemon-debian-1 command: > --server-url ws://host.docker.internal:8765/ws @@ -26,8 +26,8 @@ services: daemon-debian-2: build: - context: . - dockerfile: Dockerfile.debian + context: ../.. + dockerfile: hack/docker/Dockerfile.debian container_name: sandd-daemon-debian-2 command: > --server-url ws://host.docker.internal:8765/ws @@ -49,8 +49,8 @@ services: # Alpine-based daemons daemon-alpine-1: build: - context: . - dockerfile: Dockerfile.alpine + context: ../.. + dockerfile: hack/docker/Dockerfile.alpine container_name: sandd-daemon-alpine-1 command: > --server-url ws://host.docker.internal:8765/ws @@ -71,8 +71,8 @@ services: daemon-alpine-2: build: - context: . - dockerfile: Dockerfile.alpine + context: ../.. + dockerfile: hack/docker/Dockerfile.alpine container_name: sandd-daemon-alpine-2 command: > --server-url ws://host.docker.internal:8765/ws @@ -94,8 +94,8 @@ services: # Rocky Linux-based daemons daemon-rocky-1: build: - context: . - dockerfile: Dockerfile.rocky + context: ../.. + dockerfile: hack/docker/Dockerfile.rocky container_name: sandd-daemon-rocky-1 command: > --server-url ws://host.docker.internal:8765/ws @@ -116,8 +116,8 @@ services: daemon-rocky-2: build: - context: . - dockerfile: Dockerfile.rocky + context: ../.. + dockerfile: hack/docker/Dockerfile.rocky container_name: sandd-daemon-rocky-2 command: > --server-url ws://host.docker.internal:8765/ws From 5f9d1759a5b300e4e0f227f0c1449416a5ba7f35 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Fri, 19 Jun 2026 23:27:32 +0800 Subject: [PATCH 4/6] fix install.sh Signed-off-by: kerthcet --- hack/scripts/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/scripts/install.sh b/hack/scripts/install.sh index 10fa7a8..1f8c485 100755 --- a/hack/scripts/install.sh +++ b/hack/scripts/install.sh @@ -247,7 +247,7 @@ main() { check_root detect_os install_dependencies - # install_sandd + install_sandd if [[ "$INSTALL_TUNNEL" == "true" ]]; then install_tailscale From b273d4404f658557df84778a679bc8f4f2e5b848 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Sat, 20 Jun 2026 09:10:48 +0800 Subject: [PATCH 5/6] address comments Signed-off-by: kerthcet --- README.md | 6 ++++- python/tests/test_unit.py | 57 ++++++++++++++++++++++++++++++++++++++- sandd/src/main.rs | 14 +++++----- server/src/lib.rs | 49 +++++++++++++++++---------------- 4 files changed, 95 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index d1436c3..05f568d 100644 --- a/README.md +++ b/README.md @@ -137,7 +137,11 @@ For secure multi-cloud deployments with mesh VPN: ```python from sandd import Server -server = Server(connect="tunnel") # Secure tunnel mode +config = TunnelConfig( + authkey="YOUR_KEY", + server="http://headscale:8080", +) +server = Server(connect="tunnel", tunnel_config=config) # Secure tunnel mode ``` See [Tunnel Mode Guide](./docs/TUNNEL.md) for setup instructions. diff --git a/python/tests/test_unit.py b/python/tests/test_unit.py index 092c0df..bc19434 100644 --- a/python/tests/test_unit.py +++ b/python/tests/test_unit.py @@ -4,7 +4,7 @@ For integration tests with real daemons, see test_integration.py """ import pytest -from sandd import Server, ServerStats +from sandd import Server, ServerStats, TunnelConfig class TestServerAPI: @@ -99,6 +99,61 @@ def test_stats_repr(self): assert "total=" in repr_str +class TestTunnelMode: + """Test tunnel mode validation""" + + def test_invalid_connect_mode(self): + """Test that invalid connect parameter raises ValueError""" + with pytest.raises(ValueError, match="connect must be 'direct' or 'tunnel'"): + Server(connect="invalid") + + def test_tunnel_mode_without_config(self): + """Test that tunnel mode without config raises ValueError""" + with pytest.raises(ValueError, match="tunnel mode requires tunnel_config"): + Server(connect="tunnel") + + def test_tunnel_mode_with_none_config(self): + """Test that tunnel mode with None config raises ValueError""" + with pytest.raises(ValueError, match="tunnel mode requires tunnel_config"): + Server(connect="tunnel", tunnel_config=None) + + def test_direct_mode_explicit(self): + """Test that direct mode can be explicitly specified""" + server = Server(connect="direct") + assert server.address == "0.0.0.0:8765" + + def test_direct_mode_ignores_tunnel_config(self): + """Test that direct mode ignores tunnel_config parameter""" + config = TunnelConfig(authkey="test-key", server="http://test:8080") + server = Server(connect="direct", tunnel_config=config) + assert server.address == "0.0.0.0:8765" + + +class TestTunnelConfig: + """Test TunnelConfig class""" + + def test_create_config(self): + """Test creating TunnelConfig""" + config = TunnelConfig(authkey="test-key", server="http://localhost:8080") + assert config.authkey == "test-key" + assert config.server == "http://localhost:8080" + + def test_config_repr(self): + """Test TunnelConfig string representation""" + config = TunnelConfig(authkey="key-123", server="http://headscale:8080") + repr_str = repr(config) + assert "TunnelConfig" in repr_str + assert "http://headscale:8080" in repr_str + + def test_config_mutable(self): + """Test that TunnelConfig attributes can be modified""" + config = TunnelConfig(authkey="old-key", server="http://old:8080") + config.authkey = "new-key" + config.server = "http://new:8080" + assert config.authkey == "new-key" + assert config.server == "http://new:8080" + + class TestErrorHandling: """Test error handling""" diff --git a/sandd/src/main.rs b/sandd/src/main.rs index 19a9a4c..91e0e31 100644 --- a/sandd/src/main.rs +++ b/sandd/src/main.rs @@ -2,7 +2,7 @@ mod executor; mod protocol; mod session; -use anyhow::Result; +use anyhow::{Context, Result}; use clap::Parser; use executor::CommandExecutor; use futures_util::{SinkExt, StreamExt}; @@ -467,10 +467,10 @@ async fn setup_tunnel(args: &Args) -> Result<()> { .as_ref() .ok_or_else(|| anyhow::anyhow!("--tunnel requires --tunnel-server"))?; - // Check if tailscale is installed - let tailscale_check = Command::new("which").arg("tailscale").output(); + // Check if tailscale is installed by trying to run it + let tailscale_check = Command::new("tailscale").arg("version").output(); - if tailscale_check.is_err() || !tailscale_check.unwrap().status.success() { + if tailscale_check.is_err() { return Err(anyhow::anyhow!( "Tailscale not found. Install it first:\n \ curl -fsSL https://raw.githubusercontent.com/InftyAI/SandD/main/hack/scripts/install.sh | sudo bash -s -- --tunnel" @@ -483,7 +483,8 @@ async fn setup_tunnel(args: &Args) -> Result<()> { let _tailscaled = Command::new("tailscaled") .arg("--tun=userspace-networking") .arg("--state=/var/lib/tailscale/tailscaled.state") - .spawn(); + .spawn() + .context("Failed to start tailscaled")?; // Give tailscaled time to start tokio::time::sleep(Duration::from_secs(2)).await; @@ -496,7 +497,8 @@ async fn setup_tunnel(args: &Args) -> Result<()> { .arg(format!("--authkey={}", authkey)) .arg(format!("--login-server={}", server)) .arg("--accept-routes") - .output()?; + .output() + .context("Failed to join mesh network")?; if !output.status.success() { return Err(anyhow::anyhow!( diff --git a/server/src/lib.rs b/server/src/lib.rs index 7d16893..22c898a 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -6,6 +6,7 @@ mod protocol; mod registry; mod server; +use anyhow::Context; use pyo3::exceptions::{PyRuntimeError, PyTimeoutError, PyValueError}; use pyo3::prelude::*; use pyo3::types::PyBytes; @@ -62,23 +63,25 @@ impl Server { tunnel_config=None ))] fn new( + py: Python, host: String, port: u16, verbose: bool, connect: String, - tunnel_config: Option, + tunnel_config: Option>, ) -> PyResult { // Validate connect parameter if connect != "direct" && connect != "tunnel" { - return Err(PyValueError::new_err( - format!("connect must be 'direct' or 'tunnel', got '{}'", connect) - )); + return Err(PyValueError::new_err(format!( + "connect must be 'direct' or 'tunnel', got '{}'", + connect + ))); } // Validate tunnel parameters if connect == "tunnel" && tunnel_config.is_none() { return Err(PyValueError::new_err( - "tunnel mode requires tunnel_config parameter" + "tunnel mode requires tunnel_config parameter", )); } @@ -98,21 +101,28 @@ impl Server { // Handle tunnel mode let bind_addr = if connect == "tunnel" { - let config = tunnel_config.unwrap(); + let config_py = tunnel_config.unwrap(); + let config = config_py.borrow(py).clone(); // Setup tunnel runtime.block_on(async { - setup_tunnel_controller(&config).await + setup_tunnel_controller(&config) + .await .map_err(|e| PyRuntimeError::new_err(format!("Tunnel setup failed: {}", e))) })?; // Get mesh IP (for logging only) let mesh_ip = runtime.block_on(async { - get_mesh_ip().await + get_mesh_ip() + .await .map_err(|e| PyRuntimeError::new_err(format!("Failed to get mesh IP: {}", e))) })?; - tracing::info!("Controller mesh IP: {} (binding to 0.0.0.0:{})", mesh_ip, port); + tracing::info!( + "Controller mesh IP: {} (binding to 0.0.0.0:{})", + mesh_ip, + port + ); // Bind to 0.0.0.0 instead of mesh IP // Tailscale will route traffic to this port through the mesh @@ -478,12 +488,10 @@ pub struct PyStats { async fn setup_tunnel_controller(config: &TunnelConfig) -> anyhow::Result<()> { use std::process::Command; - // Check if tailscale is installed - let tailscale_check = Command::new("which") - .arg("tailscale") - .output(); + // Check if tailscale is installed by trying to run it + let tailscale_check = Command::new("tailscale").arg("version").output(); - if tailscale_check.is_err() || !tailscale_check.unwrap().status.success() { + if tailscale_check.is_err() { return Err(anyhow::anyhow!( "Tailscale not found. Install it first:\n \ curl -fsSL https://tailscale.com/install.sh | sh" @@ -496,7 +504,8 @@ async fn setup_tunnel_controller(config: &TunnelConfig) -> anyhow::Result<()> { let _tailscaled = Command::new("tailscaled") .arg("--tun=userspace-networking") .arg("--state=/var/lib/tailscale/tailscaled.state") - .spawn(); + .spawn() + .context("Failed to start tailscaled")?; // Give tailscaled time to start tokio::time::sleep(Duration::from_secs(2)).await; @@ -520,10 +529,7 @@ async fn setup_tunnel_controller(config: &TunnelConfig) -> anyhow::Result<()> { // Wait for IP assignment for _ in 0..30 { - let ip_output = Command::new("tailscale") - .arg("ip") - .arg("-4") - .output(); + let ip_output = Command::new("tailscale").arg("ip").arg("-4").output(); if let Ok(output) = ip_output { if output.status.success() { @@ -545,10 +551,7 @@ async fn setup_tunnel_controller(config: &TunnelConfig) -> anyhow::Result<()> { async fn get_mesh_ip() -> anyhow::Result { use std::process::Command; - let output = Command::new("tailscale") - .arg("ip") - .arg("-4") - .output()?; + let output = Command::new("tailscale").arg("ip").arg("-4").output()?; if !output.status.success() { return Err(anyhow::anyhow!("Failed to get mesh IP")); From 50ca52f875ad83b6cf9badbd23585cf674958466 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Sat, 20 Jun 2026 09:15:43 +0800 Subject: [PATCH 6/6] separate the e2e tests Signed-off-by: kerthcet --- .github/workflows/rust-ci.yaml | 27 +++++++++++++++++++++++++++ Makefile | 10 +++++----- pyproject.toml | 3 +++ python/tests/test_e2e.py | 5 +++++ 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/.github/workflows/rust-ci.yaml b/.github/workflows/rust-ci.yaml index 736f2f6..76a81a5 100644 --- a/.github/workflows/rust-ci.yaml +++ b/.github/workflows/rust-ci.yaml @@ -36,5 +36,32 @@ jobs: with: toolchain: stable + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Run tests run: make test + + test-e2e: + name: E2E Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Run E2E tests + run: make test-e2e diff --git a/Makefile b/Makefile index 74fb00c..32bbea6 100644 --- a/Makefile +++ b/Makefile @@ -10,8 +10,8 @@ help: @echo " make build - Build Python package (debug mode)" @echo " make install - Install Python package locally" @echo " make dev - Install in development mode with hot reload" - @echo " make test - Run unit and integration tests" - @echo " make test-e2e - Run end-to-end tests with Docker" + @echo " make test - Run unit and integration tests (fast, no Docker)" + @echo " make test-e2e - Run end-to-end tests with Docker (slow)" @echo " make daemon-build - Build daemon binary (debug)" @echo " make daemon-release - Build daemon binary (release)" @echo " make docker-build - Build Docker image for daemon" @@ -34,8 +34,8 @@ test: lint $(PYTEST) dev @echo "Running Rust tests (server protocol)..." cargo test --package sandbox-server --lib @echo "" - @echo "Running Python tests..." - $(PYTEST) python/tests/ + @echo "Running Python tests (excluding e2e)..." + $(PYTEST) python/tests/ -m "not e2e" daemon-build: cargo build --package sandd @@ -56,7 +56,7 @@ test-e2e: $(PYTEST) dev docker compose -f hack/docker/docker-compose.e2e.yml build @echo "" @echo "Running E2E tests with Docker..." - $(PYTEST) python/tests/test_e2e.py -v -s + $(PYTEST) python/tests/ -m e2e -v -s @echo "" @echo "Cleaning up containers..." docker compose -f hack/docker/docker-compose.e2e.yml down diff --git a/pyproject.toml b/pyproject.toml index 7ee2c10..3c3c772 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,3 +47,6 @@ exclude = [ [tool.pytest.ini_options] asyncio_mode = "auto" +markers = [ + "e2e: end-to-end tests with Docker (slow, skip by default)", +] diff --git a/python/tests/test_e2e.py b/python/tests/test_e2e.py index 8126e35..98c5da6 100644 --- a/python/tests/test_e2e.py +++ b/python/tests/test_e2e.py @@ -1,12 +1,17 @@ """End-to-end tests with Docker containers Run with: make test-e2e + +These tests are marked as 'e2e' and skipped by default in 'make test'. +Use 'make test-e2e' to run them explicitly. """ import pytest import time import subprocess from sandd import Server +pytestmark = pytest.mark.e2e + @pytest.fixture(scope="module") def docker_daemons():