--- /dev/null
+# ---- Telegram ----
+# Token from @BotFather:
+TELEGRAM_BOT_TOKEN=
+
+# Your numeric Telegram user id. Leave blank the first time: message the bot,
+# it will reply with your id, then paste it here and restart.
+AUTHORIZED_USER_ID=
+
+# ---- ollama / models ----
+OLLAMA_HOST=http://127.0.0.1:11434
+LLM_MODEL=qwen2.5:3b
+EMBED_MODEL=nomic-embed-text
+USE_EMBEDDINGS=true
+
+# ---- whisper.cpp ----
+# Absolute paths to the built binary and the model. The setup scripts fill these
+# in for you; the values below are the Raspberry Pi defaults.
+WHISPER_BIN=/opt/segismundo/whisper.cpp/build/bin/whisper-cli
+WHISPER_MODEL=/opt/segismundo/whisper.cpp/models/ggml-small.bin
+WHISPER_LANG=es
+
+# ---- storage ----
+DATA_DIR=/var/lib/segismundo
+DB_PATH=/var/lib/segismundo/segismundo.db
+
+# ---- HTTP morning endpoint ----
+HTTP_HOST=0.0.0.0
+HTTP_PORT=8081
+# Shared secret the iPhone automation must send (Authorization: Bearer <token>).
+# Generate one with: openssl rand -hex 16
+MORNING_TOKEN=
+
+# ---- contexts / locations ----
+# Comma-separated named contexts. A note/reminder mentioning one of these words
+# is filed under it; everything else is "general". Then you can ask "¿qué
+# recordatorios tengo en Madrid?" or use /recordatorios madrid.
+CONTEXTS=burgos,madrid
+
+# ---- misc ----
+TZ=Europe/Madrid
+REMINDER_POLL_SECONDS=30
+LOG_LEVEL=INFO
--- /dev/null
+# Local test overrides for this Ubuntu machine.
+# Copy to .env.local and fill in the token. The local scripts point paths at the
+# repo's ./.runtime directory so nothing touches /opt or /var.
+#
+# cp .env.local.example .env.local
+# ./scripts/setup-local.sh
+# ./scripts/run-local.sh
+
+TELEGRAM_BOT_TOKEN=
+AUTHORIZED_USER_ID=
+
+OLLAMA_HOST=http://127.0.0.1:11434
+LLM_MODEL=qwen2.5:3b
+EMBED_MODEL=nomic-embed-text
+USE_EMBEDDINGS=true
+
+# Filled in by scripts/setup-local.sh (kept inside the repo, gitignored).
+WHISPER_BIN=./.runtime/whisper.cpp/build/bin/whisper-cli
+WHISPER_MODEL=./.runtime/whisper.cpp/models/ggml-small.bin
+WHISPER_LANG=es
+
+DATA_DIR=./.runtime/data
+DB_PATH=./.runtime/data/segismundo.db
+
+HTTP_HOST=127.0.0.1
+HTTP_PORT=8081
+MORNING_TOKEN=local-test-token
+
+CONTEXTS=burgos,madrid
+TZ=Europe/Madrid
+REMINDER_POLL_SECONDS=10
+LOG_LEVEL=INFO
--- /dev/null
+# Secrets & local config
+.env
+.env.local
+
+# Local runtime (venv, whisper build, sqlite db, downloaded models)
+.runtime/
+.venv/
+venv/
+
+# Python
+__pycache__/
+*.pyc
+*.egg-info/
+.pytest_cache/
--- /dev/null
+# Segismundo 🤵
+
+A personal **AI butler** that lives on your Raspberry Pi, talks to you over
+Telegram, transcribes your voice notes, understands them with a **local** LLM,
+keeps your notes and reminders in a searchable database, and gives you a morning
+briefing on demand.
+
+Everything runs **on-device**: [ollama](https://ollama.com) for the LLM and
+embeddings, [whisper.cpp](https://github.com/ggml-org/whisper.cpp) for speech-to-text.
+No cloud, no API keys beyond your Telegram bot token. Built and tested in Spanish.
+
+---
+
+## What it does
+
+| You send… | Segismundo… |
+|-----------|-------------|
+| 🎙️ a voice note | transcribes it (Whisper) and acts on it |
+| “apunta que la wifi del hotel es Familia2024” | saves a **note** |
+| “recuérdame mañana a las 9 llamar al fontanero” | schedules a **reminder** and pings you when it's due |
+| “¿cuál era la contraseña del wifi?” | **searches** your notes (keyword + semantic) and answers |
+| “¿qué grabé ayer / la semana pasada?” | **recalls** notes by date range |
+| “recuérdame … en Madrid”, “¿qué tengo en Burgos?” | files & filters by **context** |
+| “mueve la nota 3 a Burgos”, `/contexto nota 3 madrid` | **re-files** to another context |
+| “¿estás funcionando?” | holds a short **conversation** |
+| `/temas`, `/tema jardin` | browses notes by **auto-tag** topic |
+| `/exportar` | sends you **all your notes** as a Markdown file |
+| “añade a la nota 3 …”, “cambia el recordatorio 2 a las 10” | **edits / appends** |
+| “borra el recordatorio 1”, `/borrar nota 3` | **deletes** (soft, recoverable) |
+| “deshaz” / “me he equivocado” / `/deshacer` | **undoes** the last operation |
+| (when a reminder fires) | **✅ Hecho / ⏰ +1h / 🗑️** inline buttons |
+| `/recordatorios`, `/notas`, `/buscar`, `/resumen`, `/hecho <id>` | runs **commands** |
+| an HTTP GET to `/morning` (from your iPhone) | returns a **briefing** and pushes it to Telegram |
+
+It only ever answers **you** — every message is checked against your Telegram user id.
+
+---
+
+## Architecture
+
+```
+ ┌────────────────────── Raspberry Pi 5 (rpi5, Tailscale) ──────────────────────┐
+ │ │
+ Telegram ◄──long poll──► bot.py ──► core.py (Butler) ──┬──► db.py SQLite (FTS5 + vectors) │
+ (you only) │ ├──► llm.py ──► ollama (qwen2.5:3b) │
+ ▲ │ │ + nomic-embed-text │
+ │ ├──► transcribe.py ──► ffmpeg + whisper.cpp (ggml-small, es) │
+ │ │ │
+ │ ├──► reminders.py (asyncio loop) ──► pushes due reminders to you │
+ └──── push ◄───────────┤ │
+ └──► http_api.py GET /morning ◄──── iPhone morning automation │
+ │ │
+ └───────────────────────────────────────────────────────────────────────────────┘
+```
+
+The **Butler** (`core.py`) is transport-agnostic: it turns text into an action and
+a reply, and is fully unit-tested without Telegram or ollama.
+
+### Project layout
+
+```
+segismundo/
+├── segismundo/ # the Python package
+│ ├── __main__.py # entrypoint: wires bot + scheduler + HTTP on one event loop
+│ ├── config.py # env-based configuration
+│ ├── bot.py # Telegram handlers + single-user auth
+│ ├── core.py # the Butler brain (intent → action → reply)
+│ ├── llm.py # ollama: classify / answer / chat / embed
+│ ├── transcribe.py # ffmpeg + whisper.cpp wrapper
+│ ├── timeparse.py # Spanish natural-language time parsing
+│ ├── db.py # SQLite storage (notes, FTS5, embeddings, reminders)
+│ ├── reminders.py # background scheduler
+│ └── http_api.py # /morning + /health endpoints (aiohttp)
+├── scripts/
+│ ├── setup-local.sh # set up everything on THIS machine (Ubuntu) for testing
+│ ├── run-local.sh # run the full bot locally
+│ ├── chat-repl.py # test the brain in a terminal — no Telegram needed
+│ ├── test-transcribe.sh # transcribe an audio file to check Whisper quality
+│ ├── build-whisper.sh # build whisper.cpp + download a model (x86 or ARM)
+│ ├── setup-rpi.sh # runs ON the Pi: install deps + service
+│ └── deploy-rpi.sh # run from your laptop: push code to rpi5 + set up
+├── systemd/segismundo.service
+├── tests/ # offline test suite (pytest)
+├── requirements.txt
+├── .env.example # config template (Raspberry Pi paths)
+└── .env.local.example # config template (local test paths)
+```
+
+---
+
+## Part 1 — Test it on this machine first
+
+Your Ubuntu box already has `ollama`, `docker`, `python3`, `cmake` and `ffmpeg`.
+Everything below stays inside the repo's `./.runtime/` folder (gitignored) — it
+never touches `/opt` or `/var`.
+
+```bash
+cd ~/segismundo
+./scripts/setup-local.sh
+```
+
+This builds whisper.cpp, pulls `qwen2.5:3b` + `nomic-embed-text`, creates a
+`.venv`, and writes `.env.local`.
+
+### 1a. Test the brain with no Telegram
+
+```bash
+source .venv/bin/activate
+./scripts/chat-repl.py
+```
+
+Then type things like:
+
+```
+tú> apunta que la wifi del hotel es Familia2024
+segismundo> 📝 Nota guardada (#1): la wifi del hotel es Familia2024
+
+tú> recuérdame mañana a las 9 llamar al fontanero
+segismundo> ⏰ Recordatorio #1 programado para Sunday 21/06 a las 09:00: llamar al fontanero
+
+tú> ¿cuál era la contraseña del wifi?
+segismundo> Familia2024
+
+tú> ¿estás funcionando?
+segismundo> Sí, estoy funcionando y listo para ayudarte.
+```
+
+### 1b. Test voice transcription
+
+Send yourself a voice note in Telegram and download it, or use any audio file:
+
+```bash
+./scripts/test-transcribe.sh ~/Downloads/nota.ogg
+```
+
+### 1c. Run the full bot locally (optional, needs a token)
+
+1. Create a bot with [@BotFather](https://t.me/BotFather), copy the token.
+2. Put it in `.env.local` as `TELEGRAM_BOT_TOKEN=...`.
+3. Run it:
+ ```bash
+ ./scripts/run-local.sh
+ ```
+4. Message your bot. It will reply with your Telegram **user id** — paste that as
+ `AUTHORIZED_USER_ID=` in `.env.local`, stop (Ctrl-C) and re-run.
+5. Test the morning endpoint:
+ ```bash
+ curl "http://127.0.0.1:8081/morning?token=local-test-token"
+ ```
+
+### 1d. Run the test suite
+
+```bash
+source .venv/bin/activate
+pip install pytest pytest-asyncio
+pytest -q
+```
+
+---
+
+## Part 2 — Deploy to the Raspberry Pi
+
+Once you've freed up the Pi, deploy from your laptop over Tailscale:
+
+```bash
+cd ~/segismundo
+./scripts/deploy-rpi.sh # target: rpi5 (override with RPI_SSH=user@host)
+```
+
+This will:
+1. `rsync` the code to `rpi5:/opt/segismundo` (it **never** overwrites the Pi's `.env`).
+2. Run `scripts/setup-rpi.sh` on the Pi, which:
+ - installs `git build-essential cmake ffmpeg python3-venv`,
+ - installs & enables **ollama**,
+ - creates a `segismundo` system user,
+ - **builds whisper.cpp** and downloads the `small` model (takes a few minutes on a Pi 5),
+ - creates a venv and installs the app,
+ - pulls `qwen2.5:3b` + `nomic-embed-text`,
+ - installs and enables the **systemd service**.
+
+> **Prerequisite:** SSH access to the Pi with a **sudo-capable user**, and the Pi
+> on your Tailnet as `rpi5`. Passwordless sudo is **not** required — the deploy
+> uses `ssh -t`, so you'll just be prompted for your Pi password once or twice.
+> (If you run the `ssh … sudo …` commands below by hand, keep the `-t` flag so
+> the password prompt works.)
+
+### First-run configuration on the Pi
+
+```bash
+ssh -t rpi5 'sudo nano /opt/segismundo/.env'
+```
+Set at least:
+- `TELEGRAM_BOT_TOKEN` — from BotFather
+- `MORNING_TOKEN` — generate with `openssl rand -hex 16`
+
+Then:
+```bash
+ssh -t rpi5 'sudo systemctl start segismundo'
+ssh rpi5 'journalctl -u segismundo -f' # watch the logs (no sudo needed)
+```
+
+Message your bot once, send `/id`, paste the number as `AUTHORIZED_USER_ID` in
+`.env`, and restart:
+```bash
+ssh -t rpi5 'sudo systemctl restart segismundo'
+```
+
+### Updating later (code only, fast)
+
+```bash
+./scripts/deploy-rpi.sh --code-only # rsync + restart, skips the heavy setup
+```
+
+---
+
+## Part 3 — iPhone automations
+
+There are two kinds of HTTP endpoint, both token-protected the same way.
+
+### Morning briefing (time-based)
+
+In the **Shortcuts** app → **Automation** → create a *Personal Automation*:
+
+1. Trigger: **Time of Day**, e.g. 08:00, “Run Immediately”.
+2. Action: **Get Contents of URL**
+ - URL: `http://rpi5:8081/morning`
+ - Method: `GET`
+ - Headers: `Authorization` = `Bearer <your MORNING_TOKEN>`
+
+That's it: hitting the endpoint pushes the briefing straight to your Telegram, so
+you'll just get a message every morning. (Requires the Tailscale app on the
+iPhone so `rpi5` resolves.)
+
+### Arrival briefing (location-based, per context)
+
+One endpoint per context: `GET /context/<name>` (e.g. `/context/madrid`,
+`/context/burgos`, or `/context/general`). It returns that context's pending
+reminders + recent notes, and pushes them to Telegram.
+
+Create a **Personal Automation** with an **Arrive** trigger:
+
+1. Trigger: **Arrive** → choose your Madrid location (home/office).
+2. Action: **Get Contents of URL**
+ - URL: `http://rpi5:8081/context/madrid`
+ - Headers: `Authorization` = `Bearer <your MORNING_TOKEN>`
+
+Duplicate it with an Arrive-Burgos trigger pointing at `/context/burgos`. Now
+when you land in a city your phone asks the butler what's waiting for you there.
+
+### Notes
+
+Both endpoints respond with JSON if you'd rather *show* it than push it:
+`/morning` → `{ "text", "overdue", "today", "upcoming", "recent_notes" }`;
+`/context/<name>` → `{ "context", "text", "reminders": [...], "notes": [...] }`.
+Add `&push=0` to suppress the Telegram push. `GET /health` lists the configured
+contexts. An unknown context returns `404` with the list of known ones.
+
+---
+
+## Configuration reference (`.env`)
+
+| Variable | Default | Notes |
+|----------|---------|-------|
+| `TELEGRAM_BOT_TOKEN` | — | **required**, from @BotFather |
+| `AUTHORIZED_USER_ID` | _(empty)_ | your numeric id; while empty the bot only tells you your id |
+| `OLLAMA_HOST` | `http://127.0.0.1:11434` | local ollama |
+| `LLM_MODEL` | `qwen2.5:3b` | understanding + Q&A + chat |
+| `EMBED_MODEL` | `nomic-embed-text` | semantic search; set `USE_EMBEDDINGS=false` to disable |
+| `WHISPER_BIN` / `WHISPER_MODEL` | _(set by setup)_ | whisper.cpp binary + ggml model |
+| `WHISPER_LANG` | `es` | transcription language |
+| `DB_PATH` | `/var/lib/segismundo/segismundo.db` | SQLite file |
+| `HTTP_HOST` / `HTTP_PORT` | `0.0.0.0` / `8081` | morning endpoint |
+| `MORNING_TOKEN` | _(empty)_ | bearer token; if empty the endpoint is open (Tailscale-only) |
+| `TZ` | `Europe/Madrid` | timezone for reminders & summaries |
+| `REMINDER_POLL_SECONDS` | `30` | how often the scheduler checks for due reminders |
+
+### Tuning models for the Pi (8 GB)
+
+`qwen2.5:3b` (~2 GB) + `nomic-embed-text` (~0.3 GB) + `whisper small` (~0.5 GB)
+leave comfortable headroom. If you want better Spanish semantic search and have
+RAM to spare, try `EMBED_MODEL=bge-m3` (`ollama pull bge-m3`, ~1.2 GB). For
+faster (less accurate) transcription, rebuild whisper with the `base` model:
+```bash
+WHISPER_MODEL_NAME=base ./scripts/deploy-rpi.sh
+```
+
+---
+
+## How things work (notes for future-you)
+
+- **Single-user auth:** every handler calls a guard that compares
+ `update.effective_user.id` to `AUTHORIZED_USER_ID`. Strangers get a refusal.
+- **Intent classification** uses ollama *structured output* — a JSON schema is
+ passed as `format=`, so the model is constrained to emit a valid object.
+- **Spanish time parsing** (`timeparse.py`) is the fiddly bit. `dateparser`
+ handles dates but not spoken clock times, so we normalise first: spelled-out
+ numbers (“a las nueve y media” → `09:30`), “menos cuarto”, “de la tarde”,
+ bare times → next occurrence, “el viernes”/“próximo martes”/“que viene”, and a
+ 09:00 default for date-only reminders. All covered by tests.
+- **Q&A robustness:** retrieval is hybrid (FTS5 + cosine over embeddings); the
+ answer prompt allows sensible inference (“la wifi es X” ⇒ that's the password),
+ and past-tense questions are softened to present (small models stumble on
+ “¿cuál *era*…?”).
+- **Reminders never silently vanish:** if a reminder's time can't be parsed, it's
+ saved as a note instead, and you're told.
+- **Memos are kept verbatim:** notes store your *original* words (just lightly
+ trimmed of a leading "apunta que…"), never an LLM paraphrase — so a rambling
+ "tape recorder" brain-dump is preserved in full and stays searchable. Long
+ reflective dumps are always filed as notes, even if they mention things to do.
+- **Auto-tagging:** each saved note gets 1-3 short topic tags from the LLM
+ (`#jardin`, `#trabajo`…), stored in a `tags` column. `/temas` lists them with
+ counts, `/tema <nombre>` browses one. (DB upgrades add the column automatically.)
+- **Contexts (locations):** notes and reminders carry a `context` column
+ (`general` / `burgos` / `madrid`, configurable via `CONTEXTS`). The context is
+ detected deterministically — if the message mentions a configured name it's
+ filed there, otherwise `general`. Listings and questions filter by the context
+ mentioned in the query ("¿qué recordatorios tengo en Madrid?", `/recordatorios
+ madrid`, `/notas 20 burgos`); with none mentioned you see everything, each item
+ tagged 📍. The morning briefing and `/morning` JSON include each item's context.
+ Re-file a mis-categorised item with "mueve la nota 3 a Burgos" / `/contexto
+ nota 3 madrid` (deterministic, checked before the edit handler, undoable).
+- **Date-scoped recall:** `timeparse.parse_date_range` recognises hoy / ayer /
+ anteayer / últimos N días / esta semana / semana pasada / este mes / mes
+ pasado / este año / año pasado, and questions in that window are answered from
+ just those notes.
+- **Export:** `/exportar` dumps every note (timestamp, source, tags, full text)
+ to a Markdown file sent straight to your chat.
+- **Delete, edit & undo:** delete is *soft* (a `deleted` flag, not a real DROP),
+ and every mutating action is appended to an `ops` log. "borra el recordatorio
+ 1", "añade a la nota 3 …", "cambia el recordatorio 2 a las 10" and
+ "deshaz"/"me he equivocado" are matched by deterministic regex **before** the
+ LLM classifier (which mis-stores them as notes), so they're reliable. Edit is
+ checked before delete so "añade … borrador" appends instead of deleting (the
+ word *borrador* merely contains *borra*). Edits snapshot the prior state into
+ `ops.undo_data`, so undo restores the exact previous content/time (notes are
+ re-embedded on restore). `undo_last` walks the log backwards — multi-step,
+ and it survives restarts (SQLite).
+- **Reminder action buttons:** every reminder ping carries an inline keyboard —
+ **✅ Hecho** (marks done, undoable), **⏰ +1h** (snoozes one hour and re-arms),
+ **🗑️** (soft-deletes, undoable). Handled by a `CallbackQueryHandler` that
+ re-checks the single-user auth on each tap.
+
+## Troubleshooting
+
+| Symptom | Check |
+|---------|-------|
+| Bot doesn't reply | `journalctl -u segismundo -f`; is `TELEGRAM_BOT_TOKEN` set? |
+| “No estoy autorizado…” | set `AUTHORIZED_USER_ID` (send `/id`), restart |
+| Voice notes fail | `ffmpeg -version`; `WHISPER_BIN`/`WHISPER_MODEL` paths exist? |
+| LLM errors | `ollama list` shows the models? `systemctl status ollama` |
+| `/morning` 401 | header must be exactly `Authorization: Bearer <MORNING_TOKEN>` |
+| Reminders don't fire | clock/`TZ` correct? scheduler logs say “Reminder scheduler started”? |
--- /dev/null
+[project]
+name = "segismundo"
+version = "0.1.0"
+description = "A personal AI butler for Telegram, running locally on a Raspberry Pi"
+requires-python = ">=3.11"
+dependencies = [
+ "python-telegram-bot>=21,<22",
+ "ollama>=0.4",
+ "aiohttp>=3.9",
+ "numpy>=1.26",
+ "dateparser>=1.2",
+ "python-dotenv>=1.0",
+]
+
+[project.scripts]
+segismundo = "segismundo.__main__:main"
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+
+[build-system]
+requires = ["setuptools>=68"]
+build-backend = "setuptools.build_meta"
--- /dev/null
+python-telegram-bot>=21,<22
+ollama>=0.4
+aiohttp>=3.9
+numpy>=1.26
+dateparser>=1.2
+python-dotenv>=1.0
--- /dev/null
+#!/usr/bin/env bash
+# Build whisper.cpp and download a ggml model. Works on x86_64 (your laptop) and
+# arm64 (the Raspberry Pi) — CMake picks the right SIMD automatically.
+#
+# Usage: build-whisper.sh <install_dir> <model_name>
+# install_dir : where to clone+build whisper.cpp (e.g. /opt/segismundo/whisper.cpp)
+# model_name : tiny | base | small | medium | large-v3-turbo ... (default: small)
+set -euo pipefail
+
+INSTALL_DIR="${1:?usage: build-whisper.sh <install_dir> <model_name>}"
+MODEL="${2:-small}"
+REPO="https://github.com/ggml-org/whisper.cpp"
+
+echo ">> whisper.cpp -> $INSTALL_DIR (model: $MODEL)"
+
+if [ ! -d "$INSTALL_DIR/.git" ]; then
+ mkdir -p "$(dirname "$INSTALL_DIR")"
+ git clone --depth 1 "$REPO" "$INSTALL_DIR"
+else
+ echo ">> repo already present, pulling latest"
+ git -C "$INSTALL_DIR" pull --ff-only || true
+fi
+
+cd "$INSTALL_DIR"
+
+# Build (Release). The binary lands at build/bin/whisper-cli.
+cmake -B build -DCMAKE_BUILD_TYPE=Release
+cmake --build build --config Release -j "$(nproc)"
+
+if [ ! -x build/bin/whisper-cli ]; then
+ echo "!! build/bin/whisper-cli not found after build" >&2
+ exit 1
+fi
+
+# Download the model if missing.
+if [ ! -f "models/ggml-${MODEL}.bin" ]; then
+ echo ">> downloading model ggml-${MODEL}.bin"
+ bash ./models/download-ggml-model.sh "$MODEL"
+else
+ echo ">> model ggml-${MODEL}.bin already present"
+fi
+
+echo ">> Done."
+echo " WHISPER_BIN=$INSTALL_DIR/build/bin/whisper-cli"
+echo " WHISPER_MODEL=$INSTALL_DIR/models/ggml-${MODEL}.bin"
--- /dev/null
+#!/usr/bin/env python3
+"""Interactive REPL to test Segismundo's brain without Telegram.
+
+Loads .env.local (or .env), spins up the real DB + ollama LLM, and lets you type
+messages as if you were chatting with the bot. Voice notes aren't covered here
+(use scripts/test-transcribe.sh for that), but notes, reminders, questions,
+listings and chit-chat all work.
+
+ ./scripts/chat-repl.py
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from segismundo.config import Config # noqa: E402
+from segismundo.core import Butler # noqa: E402
+from segismundo.db import Database # noqa: E402
+from segismundo.llm import LLM # noqa: E402
+
+
+async def main():
+ env = ".env.local" if os.path.exists(".env.local") else ".env"
+ # The REPL doesn't need a real Telegram token; inject a dummy if missing.
+ os.environ.setdefault("TELEGRAM_BOT_TOKEN", "repl-dummy")
+ cfg = Config.load(env)
+ os.makedirs(cfg.data_dir, exist_ok=True)
+
+ db = Database(cfg.db_path)
+ llm = LLM(cfg.ollama_host, cfg.llm_model, cfg.embed_model,
+ cfg.use_embeddings, cfg.tz)
+ butler = Butler(db, llm, cfg.tz)
+
+ print(f"Segismundo REPL — modelo {cfg.llm_model} en {cfg.ollama_host}")
+ print("Escribe un mensaje (o 'salir'). Prueba: «apunta comprar pan», "
+ "«recuérdame mañana a las 9 ir al banco», «¿qué tengo que comprar?», "
+ "«¿estás funcionando?», «/resumen».\n")
+
+ while True:
+ try:
+ text = input("tú> ").strip()
+ except (EOFError, KeyboardInterrupt):
+ print()
+ break
+ if text.lower() in ("salir", "exit", "quit"):
+ break
+ if not text:
+ continue
+ if text == "/resumen":
+ print("segismundo>", butler.morning_summary_data()[0], "\n")
+ continue
+ if text == "/recordatorios":
+ print("segismundo>", butler.list_reminders(), "\n")
+ continue
+ if text.startswith("/notas"):
+ parts = text.split()
+ n = int(parts[1]) if len(parts) > 1 and parts[1].isdigit() else 10
+ print("segismundo>", butler.list_notes(limit=n), "\n")
+ continue
+ if text == "/temas":
+ print("segismundo>", butler.list_topics(), "\n")
+ continue
+ if text.startswith("/tema "):
+ print("segismundo>", butler.notes_for_tag(text.split(maxsplit=1)[1]), "\n")
+ continue
+ if text == "/export":
+ with open("notas-segismundo.md", "w", encoding="utf-8") as f:
+ f.write(butler.export_markdown())
+ print("segismundo> Exportado a notas-segismundo.md\n")
+ continue
+ if text in ("/deshacer", "/undo"):
+ print("segismundo>", await butler.undo_last(), "\n")
+ continue
+ if text.startswith("/borrar "):
+ print("segismundo>", butler.handle_delete("borra " + text.split(maxsplit=1)[1]), "\n")
+ continue
+ reply = await butler.handle_message(text)
+ print("segismundo>", reply, "\n")
+
+ db.close()
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
--- /dev/null
+#!/usr/bin/env bash
+# Deploy Segismundo to the Raspberry Pi from this machine, over Tailscale.
+# Copies the repo to rpi5:/opt/segismundo and runs the on-Pi setup.
+#
+# ./scripts/deploy-rpi.sh # deploy to rpi5 (default)
+# RPI_SSH=pi@rpi5 ./scripts/deploy-rpi.sh # custom ssh target
+# ./scripts/deploy-rpi.sh --code-only # sync code + restart, skip full setup
+#
+# Requirements on the Pi: SSH access and a sudo-capable user. Passwordless sudo
+# is NOT required — every remote sudo runs over an interactive TTY (ssh -t), so
+# you'll just be prompted for your Pi password once or twice during a deploy.
+set -euo pipefail
+cd "$(dirname "$0")/.."
+
+RPI_SSH="${RPI_SSH:-rpi5}"
+APP_DIR="/opt/segismundo"
+CODE_ONLY=0
+[ "${1:-}" = "--code-only" ] && CODE_ONLY=1
+
+echo "==> Target: $RPI_SSH:$APP_DIR"
+echo "==> Checking connectivity"
+ssh -o ConnectTimeout=8 "$RPI_SSH" "echo connected as \$(whoami) on \$(hostname)"
+
+echo "==> Ensuring $APP_DIR exists and is owned by the deploy user (may ask for your Pi sudo password)"
+# Recursive chown: setup-rpi.sh hands the whole tree to the 'segismundo' service
+# user, and pip drops a cache under it, so a later rsync as this user would hit
+# 'Permission denied'. Reclaim ownership before syncing; setup re-chowns at the end.
+ssh -t "$RPI_SSH" "sudo mkdir -p $APP_DIR && sudo chown -R \$(whoami) $APP_DIR"
+
+echo "==> Syncing code (preserving remote .env, skipping local runtime)"
+rsync -az --delete \
+ --exclude '.git' \
+ --exclude '.runtime' \
+ --exclude '.venv' \
+ --exclude '.env' \
+ --exclude '.env.local' \
+ --exclude '__pycache__' \
+ --exclude 'whisper.cpp' \
+ ./ "$RPI_SSH:$APP_DIR/"
+
+if [ "$CODE_ONLY" -eq 1 ]; then
+ echo "==> Restarting service (code-only; may ask for your Pi sudo password)"
+ ssh -t "$RPI_SSH" "sudo systemctl restart segismundo && systemctl --no-pager status segismundo | head -n 5"
+ echo "✅ Code updated and service restarted."
+ exit 0
+fi
+
+echo "==> Running full setup on the Pi (this builds whisper.cpp + pulls models)"
+# Forward model overrides through ssh + sudo (sudo resets the env, so pass them
+# as inline VAR=value assignments on the sudo command line).
+REMOTE_ENV=""
+for v in WHISPER_MODEL_NAME LLM_MODEL EMBED_MODEL; do
+ [ -n "${!v:-}" ] && REMOTE_ENV="$REMOTE_ENV $v=${!v}"
+done
+ssh -t "$RPI_SSH" "sudo${REMOTE_ENV} bash $APP_DIR/scripts/setup-rpi.sh"
+
+cat <<EOF
+
+✅ Deploy finished.
+
+Next on the Pi (if first time):
+ ssh -t $RPI_SSH 'sudo nano /opt/segismundo/.env' # set token + MORNING_TOKEN
+ ssh -t $RPI_SSH 'sudo systemctl start segismundo'
+ ssh $RPI_SSH 'journalctl -u segismundo -f'
+
+Then message your bot, send /id, paste the number as AUTHORIZED_USER_ID, and:
+ ssh -t $RPI_SSH 'sudo systemctl restart segismundo'
+EOF
--- /dev/null
+#!/usr/bin/env bash
+# Run the full Segismundo bot locally using .env.local.
+set -euo pipefail
+cd "$(dirname "$0")/.."
+
+[ -f .env.local ] || { echo "Missing .env.local — run ./scripts/setup-local.sh first"; exit 1; }
+[ -d .venv ] || { echo "Missing .venv — run ./scripts/setup-local.sh first"; exit 1; }
+
+# shellcheck disable=SC1091
+source .venv/bin/activate
+
+# Load .env.local into the environment.
+set -a
+# shellcheck disable=SC1091
+source .env.local
+set +a
+
+if [ -z "${TELEGRAM_BOT_TOKEN:-}" ]; then
+ echo "TELEGRAM_BOT_TOKEN is empty in .env.local."
+ echo "Add your @BotFather token to chat over Telegram, or use ./scripts/chat-repl.py instead."
+ exit 1
+fi
+
+echo "Starting Segismundo locally (HTTP on http://${HTTP_HOST}:${HTTP_PORT}/morning)…"
+exec python -m segismundo
--- /dev/null
+#!/usr/bin/env bash
+# Set up Segismundo for local testing on this Ubuntu machine.
+# Everything goes under ./.runtime (gitignored) so it never touches /opt or /var.
+#
+# ./scripts/setup-local.sh
+#
+# Prereqs (already present on your box): ollama, python3, git, cmake, ffmpeg.
+set -euo pipefail
+cd "$(dirname "$0")/.." # repo root
+
+RUNTIME="$PWD/.runtime"
+WHISPER_DIR="$RUNTIME/whisper.cpp"
+MODEL="${WHISPER_MODEL_NAME:-small}"
+LLM_MODEL="${LLM_MODEL:-qwen2.5:3b}"
+EMBED_MODEL="${EMBED_MODEL:-nomic-embed-text}"
+
+echo "==> 1/6 Checking system tools"
+for bin in git cmake ffmpeg python3 ollama; do
+ command -v "$bin" >/dev/null || { echo "Missing: $bin"; exit 1; }
+done
+
+echo "==> 2/6 Python virtualenv (.venv) + deps"
+python3 -m venv .venv
+# shellcheck disable=SC1091
+source .venv/bin/activate
+pip install --upgrade pip >/dev/null
+pip install -r requirements.txt
+
+echo "==> 3/6 Building whisper.cpp (model: $MODEL)"
+mkdir -p "$RUNTIME"
+bash scripts/build-whisper.sh "$WHISPER_DIR" "$MODEL"
+
+echo "==> 4/6 Pulling ollama models"
+ollama list | grep -q "$LLM_MODEL" || ollama pull "$LLM_MODEL"
+ollama list | grep -q "$EMBED_MODEL" || ollama pull "$EMBED_MODEL"
+
+echo "==> 5/6 Data dir"
+mkdir -p "$RUNTIME/data"
+
+echo "==> 6/6 .env.local"
+if [ ! -f .env.local ]; then
+ cp .env.local.example .env.local
+ echo " Created .env.local — add your TELEGRAM_BOT_TOKEN to chat over Telegram."
+ echo " (Not needed for ./scripts/chat-repl.py)"
+fi
+
+cat <<EOF
+
+✅ Local setup complete.
+
+Test WITHOUT Telegram (just the brain):
+ source .venv/bin/activate
+ ./scripts/chat-repl.py
+
+Test transcription on an audio file:
+ ./scripts/test-transcribe.sh path/to/audio.ogg
+
+Run the FULL bot locally (needs a token in .env.local):
+ ./scripts/run-local.sh
+
+Run unit tests:
+ source .venv/bin/activate && pip install pytest pytest-asyncio && pytest -q
+EOF
--- /dev/null
+#!/usr/bin/env bash
+# Runs ON the Raspberry Pi (Raspberry Pi OS Lite, arm64). Installs system deps,
+# ollama, builds whisper.cpp, pulls models, creates a venv, installs the app, and
+# enables the systemd service.
+#
+# Intended to be invoked by scripts/deploy-rpi.sh, but you can also run it by hand:
+# sudo ./scripts/setup-rpi.sh
+#
+# Assumes the repo has already been copied to /opt/segismundo (deploy does this).
+set -euo pipefail
+
+APP_DIR="/opt/segismundo"
+DATA_DIR="/var/lib/segismundo"
+WHISPER_DIR="$APP_DIR/whisper.cpp"
+SERVICE_USER="segismundo"
+MODEL="${WHISPER_MODEL_NAME:-small}"
+LLM_MODEL="${LLM_MODEL:-qwen2.5:3b}"
+EMBED_MODEL="${EMBED_MODEL:-nomic-embed-text}"
+
+if [ "$(id -u)" -ne 0 ]; then echo "Run with sudo."; exit 1; fi
+
+echo "==> 1/8 System packages"
+apt-get update
+apt-get install -y --no-install-recommends \
+ git build-essential cmake ffmpeg python3 python3-venv python3-dev curl ca-certificates
+
+echo "==> 2/8 ollama"
+if ! command -v ollama >/dev/null; then
+ curl -fsSL https://ollama.com/install.sh | sh
+fi
+systemctl enable --now ollama
+
+echo "==> 3/8 Service user"
+if ! id "$SERVICE_USER" >/dev/null 2>&1; then
+ useradd --system --home "$APP_DIR" --shell /usr/sbin/nologin "$SERVICE_USER"
+fi
+
+echo "==> 4/8 Directories"
+mkdir -p "$APP_DIR" "$DATA_DIR"
+chown -R "$SERVICE_USER:$SERVICE_USER" "$APP_DIR" "$DATA_DIR"
+
+echo "==> 5/8 whisper.cpp (model: $MODEL)"
+sudo -u "$SERVICE_USER" bash "$APP_DIR/scripts/build-whisper.sh" "$WHISPER_DIR" "$MODEL"
+
+echo "==> 6/8 Python venv + app deps"
+# The service user's HOME is $APP_DIR, so pip would drop its cache there and break
+# the next rsync deploy. Keep the cache under the data dir instead.
+PIP_CACHE_DIR="$DATA_DIR/pip-cache"
+sudo -u "$SERVICE_USER" python3 -m venv "$APP_DIR/.venv"
+sudo -u "$SERVICE_USER" PIP_CACHE_DIR="$PIP_CACHE_DIR" "$APP_DIR/.venv/bin/pip" install --upgrade pip
+sudo -u "$SERVICE_USER" PIP_CACHE_DIR="$PIP_CACHE_DIR" "$APP_DIR/.venv/bin/pip" install -r "$APP_DIR/requirements.txt"
+
+echo "==> 7/8 ollama models (this can take a while on first run)"
+sudo -u "$SERVICE_USER" ollama pull "$LLM_MODEL"
+sudo -u "$SERVICE_USER" ollama pull "$EMBED_MODEL"
+
+echo "==> 8/8 systemd service"
+if [ ! -f "$APP_DIR/.env" ]; then
+ cp "$APP_DIR/.env.example" "$APP_DIR/.env"
+ chown "$SERVICE_USER:$SERVICE_USER" "$APP_DIR/.env"
+ chmod 600 "$APP_DIR/.env"
+ echo " !! Created $APP_DIR/.env — edit it (token, AUTHORIZED_USER_ID, MORNING_TOKEN) then:"
+ echo " sudo systemctl restart segismundo"
+fi
+
+# Keep WHISPER_MODEL aligned with the model we just built/downloaded ($MODEL), so
+# the running service uses it rather than whatever .env happened to contain. This
+# is what the service actually reads; WHISPER_MODEL_NAME only controls the build.
+WHISPER_MODEL_PATH="$WHISPER_DIR/models/ggml-${MODEL}.bin"
+if grep -q '^WHISPER_MODEL=' "$APP_DIR/.env"; then
+ sed -i "s|^WHISPER_MODEL=.*|WHISPER_MODEL=$WHISPER_MODEL_PATH|" "$APP_DIR/.env"
+else
+ echo "WHISPER_MODEL=$WHISPER_MODEL_PATH" >> "$APP_DIR/.env"
+fi
+echo " WHISPER_MODEL -> $WHISPER_MODEL_PATH"
+
+install -m 644 "$APP_DIR/systemd/segismundo.service" /etc/systemd/system/segismundo.service
+systemctl daemon-reload
+systemctl enable segismundo
+
+echo
+echo "✅ Setup done."
+echo " 1) Edit /opt/segismundo/.env (TELEGRAM_BOT_TOKEN, MORNING_TOKEN; AUTHORIZED_USER_ID can wait)"
+echo " 2) sudo systemctl start segismundo"
+echo " 3) Message your bot once; send /id to learn AUTHORIZED_USER_ID, put it in .env, restart."
+echo " 4) Logs: journalctl -u segismundo -f"
--- /dev/null
+#!/usr/bin/env bash
+# Transcribe an audio file with the locally-built whisper.cpp, exactly the way
+# the bot does (ffmpeg -> 16kHz mono WAV -> whisper-cli). Handy for checking
+# transcription quality before going live.
+#
+# ./scripts/test-transcribe.sh path/to/voice.ogg [lang]
+set -euo pipefail
+cd "$(dirname "$0")/.."
+
+AUDIO="${1:?usage: test-transcribe.sh <audio-file> [lang]}"
+LANG="${2:-es}"
+
+WHISPER_BIN="${WHISPER_BIN:-$PWD/.runtime/whisper.cpp/build/bin/whisper-cli}"
+WHISPER_MODEL="${WHISPER_MODEL:-$PWD/.runtime/whisper.cpp/models/ggml-small.bin}"
+
+[ -x "$WHISPER_BIN" ] || { echo "whisper-cli not found at $WHISPER_BIN (run setup-local.sh)"; exit 1; }
+[ -f "$WHISPER_MODEL" ] || { echo "model not found at $WHISPER_MODEL"; exit 1; }
+[ -f "$AUDIO" ] || { echo "audio not found: $AUDIO"; exit 1; }
+
+TMP="$(mktemp --suffix=.wav)"
+trap 'rm -f "$TMP"' EXIT
+
+echo ">> Converting to 16kHz mono WAV…"
+ffmpeg -nostdin -y -i "$AUDIO" -ar 16000 -ac 1 -c:a pcm_s16le "$TMP" \
+ >/dev/null 2>&1
+
+echo ">> Transcribing ($LANG)…"
+"$WHISPER_BIN" -m "$WHISPER_MODEL" -f "$TMP" -l "$LANG" -t "$(nproc)" -np -nt
+echo
--- /dev/null
+"""Segismundo — a personal AI butler that lives on a Raspberry Pi.
+
+Talks to its owner over Telegram, transcribes voice notes with whisper.cpp,
+understands them with a local LLM (ollama), stores searchable notes and
+reminders in SQLite, and exposes a morning-briefing HTTP endpoint.
+"""
+
+__version__ = "0.1.0"
--- /dev/null
+"""Entry point: wire DB + LLM + transcriber + bot + scheduler + HTTP together.
+
+Runs everything on a single asyncio event loop. Telegram is consumed via long
+polling (no public webhook needed — perfect behind Tailscale).
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+import signal
+from datetime import datetime
+
+from telegram import Update
+
+from .bot import build_application, reminder_keyboard, set_menu_commands
+from .config import Config
+from .core import Butler
+from .db import Database
+from .http_api import start_http
+from .llm import LLM
+from .reminders import scheduler_loop
+from .transcribe import Transcriber
+
+logging.basicConfig(
+ level=os.environ.get("LOG_LEVEL", "INFO"),
+ format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+)
+log = logging.getLogger("segismundo")
+
+
+async def run() -> None:
+ cfg = Config.load()
+ os.makedirs(cfg.data_dir, exist_ok=True)
+ os.makedirs(os.path.dirname(cfg.db_path) or ".", exist_ok=True)
+
+ db = Database(cfg.db_path)
+ llm = LLM(cfg.ollama_host, cfg.llm_model, cfg.embed_model,
+ cfg.use_embeddings, cfg.tz)
+ transcriber = Transcriber(cfg.whisper_bin, cfg.whisper_model, cfg.whisper_lang)
+ butler = Butler(db, llm, cfg.tz, contexts=cfg.contexts)
+
+ app = build_application(cfg, butler, transcriber)
+
+ async def send_to_owner(text: str) -> None:
+ if cfg.authorized_user_id is None:
+ log.warning("Cannot push message: AUTHORIZED_USER_ID not set")
+ return
+ await app.bot.send_message(chat_id=cfg.authorized_user_id, text=text)
+
+ async def notify_reminder(rem) -> None:
+ if cfg.authorized_user_id is None:
+ log.warning("Cannot push reminder: AUTHORIZED_USER_ID not set")
+ return
+ local = datetime.fromisoformat(rem.due_at).astimezone(cfg.tz)
+ msg = (f"⏰ Recordatorio: {rem.text}\n"
+ f"(programado para {local.strftime('%d/%m %H:%M')})")
+ await app.bot.send_message(
+ chat_id=cfg.authorized_user_id, text=msg,
+ reply_markup=reminder_keyboard(rem.id),
+ )
+
+ stop = asyncio.Event()
+
+ async with app:
+ await app.start()
+ await app.updater.start_polling(allowed_updates=Update.ALL_TYPES)
+ await set_menu_commands(app)
+ log.info("Telegram polling started")
+
+ http_runner = await start_http(
+ butler, send_to_owner, cfg.http_host, cfg.http_port, cfg.morning_token
+ )
+ sched = asyncio.create_task(
+ scheduler_loop(db, notify_reminder, cfg.tz, cfg.reminder_poll_seconds)
+ )
+
+ # Graceful shutdown on SIGTERM/SIGINT (systemd sends SIGTERM).
+ loop = asyncio.get_running_loop()
+ for sig in (signal.SIGTERM, signal.SIGINT):
+ try:
+ loop.add_signal_handler(sig, stop.set)
+ except NotImplementedError:
+ pass
+
+ log.info("Segismundo is up. Press Ctrl-C to stop.")
+ await stop.wait()
+
+ log.info("Shutting down…")
+ sched.cancel()
+ await http_runner.cleanup()
+ await app.updater.stop()
+ await app.stop()
+ db.close()
+
+
+def main() -> None:
+ try:
+ asyncio.run(run())
+ except (KeyboardInterrupt, SystemExit) as e:
+ if isinstance(e, SystemExit) and e.code not in (0, None):
+ raise
+
+
+if __name__ == "__main__":
+ main()
--- /dev/null
+"""Telegram bot: single-user auth, voice + text handling, commands.
+
+Only the configured AUTHORIZED_USER_ID is served. If that id is not configured,
+the bot tells whoever messages it their own user id (so you can set it once and
+restart) and refuses to do anything else.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from datetime import datetime, timedelta, timezone
+
+from telegram import (
+ BotCommand,
+ InlineKeyboardButton,
+ InlineKeyboardMarkup,
+ Update,
+)
+from telegram.constants import ChatAction
+from telegram.ext import (
+ Application,
+ CallbackQueryHandler,
+ CommandHandler,
+ ContextTypes,
+ MessageHandler,
+ filters,
+)
+
+from .config import Config
+from .core import Butler
+from .transcribe import Transcriber
+
+log = logging.getLogger("segismundo.bot")
+
+
+def _context_arg(args, butler: Butler) -> str | None:
+ """Pick a known context name out of command args (e.g. /notas madrid)."""
+ for a in args or []:
+ if a.lower() in butler.contexts:
+ return a.lower()
+ return None
+
+
+def _is_authorized(update: Update, cfg: Config) -> bool:
+ user = update.effective_user
+ return bool(user and cfg.authorized_user_id and user.id == cfg.authorized_user_id)
+
+
+async def _guard(update: Update, cfg: Config) -> bool:
+ """Return True if the message may be processed; otherwise reply and stop."""
+ user = update.effective_user
+ if cfg.authorized_user_id is None:
+ await update.effective_message.reply_text(
+ "Segismundo aún no está vinculado a ningún usuario.\n"
+ f"Tu ID de Telegram es: {user.id}\n\n"
+ "Pon AUTHORIZED_USER_ID con ese número en el .env y reinicia el servicio."
+ )
+ return False
+ if not _is_authorized(update, cfg):
+ log.warning("Rejected message from unauthorized user %s", user.id if user else "?")
+ await update.effective_message.reply_text("No estoy autorizado a hablar contigo.")
+ return False
+ return True
+
+
+# ---------------- command handlers ----------------
+
+async def cmd_start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ await update.effective_message.reply_text(
+ "👋 Soy Segismundo, tu mayordomo.\n\n"
+ "Mándame notas (texto o voz), pídeme recordatorios o pregúntame sobre "
+ "lo que te he guardado.\n\n"
+ "También puedes pedirme cosas por fecha: «¿qué grabé ayer?», «mis notas "
+ "de la semana pasada».\n\n"
+ "Comandos:\n"
+ "/recordatorios [contexto] — ver pendientes (p.ej. /recordatorios madrid)\n"
+ "/notas [N] [contexto] — ver tus N notas más recientes\n"
+ "/temas — ver tus temas (etiquetas automáticas)\n"
+ "/tema <nombre> — ver las notas de un tema\n"
+ "/buscar <texto> — buscar en tus notas\n"
+ "/exportar — descargar todas tus notas en Markdown\n"
+ "/resumen — resumen tipo mañanero\n"
+ "/hecho <id> — marcar recordatorio como hecho\n"
+ "/editar nota|recordatorio <id> ... — editar algo\n"
+ "/borrar nota|recordatorio <id> — borrar algo\n"
+ "/deshacer — deshacer la última operación\n"
+ "/id — ver tu ID de Telegram\n\n"
+ "También en lenguaje normal: «añade a la nota 3 …», «cambia el "
+ "recordatorio 2 a las 10», «borra el recordatorio 1», «deshaz».\n\n"
+ "📍 Contextos: si mencionas Burgos o Madrid, lo archivo ahí. Luego "
+ "pregunta «¿qué recordatorios tengo en Madrid?», o mueve algo con «mueve "
+ "la nota 3 a Burgos» (/contexto). Cada recordatorio trae botones "
+ "✅/⏰/🗑️ cuando salta."
+ )
+
+
+async def cmd_id(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ await update.effective_message.reply_text(f"Tu ID de Telegram es: {update.effective_user.id}")
+
+
+async def cmd_reminders(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ ctx = _context_arg(context.args, butler)
+ await update.effective_message.reply_text(butler.list_reminders(context=ctx))
+
+
+async def cmd_notes(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ limit = 10
+ if context.args and context.args[0].isdigit():
+ limit = max(1, min(int(context.args[0]), 100))
+ ctx = _context_arg(context.args, butler)
+ await update.effective_message.reply_text(butler.list_notes(limit=limit, context=ctx))
+
+
+async def cmd_topics(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ await update.effective_message.reply_text(butler.list_topics())
+
+
+async def cmd_tag(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ if not context.args:
+ await update.effective_message.reply_text("Uso: /tema <nombre> (ver /temas)")
+ return
+ await update.effective_message.reply_text(butler.notes_for_tag(context.args[0]))
+
+
+async def cmd_export(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ md = butler.export_markdown()
+ import io
+
+ doc = io.BytesIO(md.encode("utf-8"))
+ doc.name = "notas-segismundo.md"
+ await update.effective_message.reply_document(
+ document=doc, filename="notas-segismundo.md",
+ caption="🗒️ Aquí tienes todas tus notas.",
+ )
+
+
+async def cmd_search(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ query = " ".join(context.args) if context.args else ""
+ if not query:
+ await update.effective_message.reply_text("Uso: /buscar <texto a buscar>")
+ return
+ await update.effective_chat.send_action(ChatAction.TYPING)
+ reply = await butler._answer_question(query)
+ await update.effective_message.reply_text(reply)
+
+
+async def cmd_summary(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ text, _ = butler.morning_summary_data()
+ await update.effective_message.reply_text(text)
+
+
+async def cmd_done(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ if not context.args or not context.args[0].isdigit():
+ await update.effective_message.reply_text("Uso: /hecho <id del recordatorio>")
+ return
+ ok = butler.mark_reminder_done(int(context.args[0]))
+ await update.effective_message.reply_text(
+ "✅ Marcado como hecho. («deshaz» para revertir)" if ok
+ else "No encontré ese recordatorio."
+ )
+
+
+async def cmd_undo(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ await update.effective_message.reply_text(await butler.undo_last())
+
+
+async def cmd_delete(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ if not context.args:
+ await update.effective_message.reply_text(
+ "Uso: /borrar nota <id> ó /borrar recordatorio <id>")
+ return
+ # Reuse the natural-language delete logic.
+ await update.effective_message.reply_text(
+ butler.handle_delete("borra " + " ".join(context.args)))
+
+
+async def cmd_edit(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ if len(context.args) < 2:
+ await update.effective_message.reply_text(
+ "Uso: /editar nota <id> <nuevo texto> ó "
+ "/editar recordatorio <id> <nueva hora/texto>")
+ return
+ await update.effective_message.reply_text(
+ await butler.handle_edit("cambia " + " ".join(context.args)))
+
+
+async def cmd_context(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ if len(context.args) < 2:
+ await update.effective_message.reply_text(
+ "Uso: /contexto nota <id> <madrid|burgos|general> ó "
+ "/contexto recordatorio <id> <contexto>")
+ return
+ await update.effective_message.reply_text(
+ butler.handle_move("mueve " + " ".join(context.args)))
+
+
+# ---------------- message handlers ----------------
+
+async def on_text(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ await update.effective_chat.send_action(ChatAction.TYPING)
+ reply = await butler.handle_message(update.effective_message.text, source="text")
+ await update.effective_message.reply_text(reply)
+
+
+async def on_voice(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ if not await _guard(update, cfg):
+ return
+ butler: Butler = context.bot_data["butler"]
+ transcriber: Transcriber = context.bot_data["transcriber"]
+
+ msg = update.effective_message
+ voice = msg.voice or msg.audio
+ if voice is None:
+ return
+
+ await update.effective_chat.send_action(ChatAction.TYPING)
+ path = transcriber.tempfile()
+ try:
+ tg_file = await voice.get_file()
+ await tg_file.download_to_drive(path)
+ text = await transcriber.transcribe(path)
+ except Exception:
+ log.exception("Transcription failed")
+ await msg.reply_text("No he podido transcribir la nota de voz. 😕")
+ return
+ finally:
+ import os
+ try:
+ os.remove(path)
+ except OSError:
+ pass
+
+ if not text:
+ await msg.reply_text("No he entendido nada en la nota de voz.")
+ return
+
+ await msg.reply_text(f"🎙️ Te he entendido:\n«{text}»")
+ await update.effective_chat.send_action(ChatAction.TYPING)
+ reply = await butler.handle_message(text, source="voice")
+ await msg.reply_text(reply)
+
+
+# Commands shown in Telegram's "/" autocomplete menu (Spanish primaries).
+MENU_COMMANDS = [
+ BotCommand("recordatorios", "Ver recordatorios pendientes"),
+ BotCommand("notas", "Ver notas recientes (/notas N)"),
+ BotCommand("temas", "Ver tus temas/etiquetas"),
+ BotCommand("tema", "Ver notas de un tema (/tema jardin)"),
+ BotCommand("buscar", "Buscar en tus notas"),
+ BotCommand("exportar", "Exportar todas las notas a Markdown"),
+ BotCommand("resumen", "Resumen de lo pendiente"),
+ BotCommand("hecho", "Marcar un recordatorio como hecho (/hecho <id>)"),
+ BotCommand("editar", "Editar nota/recordatorio (/editar nota 3 ...)"),
+ BotCommand("contexto", "Mover a otro contexto (/contexto nota 3 madrid)"),
+ BotCommand("borrar", "Borrar nota/recordatorio (/borrar nota 3)"),
+ BotCommand("deshacer", "Deshacer la última operación"),
+ BotCommand("id", "Ver tu ID de Telegram"),
+ BotCommand("start", "Iniciar y ver la ayuda"),
+]
+
+
+async def set_menu_commands(app: Application) -> None:
+ """Publish the command menu so commands show up in the Telegram UI."""
+ await app.bot.set_my_commands(MENU_COMMANDS)
+
+
+# ---------------- reminder action buttons ----------------
+
+def reminder_keyboard(rid: int) -> InlineKeyboardMarkup:
+ return InlineKeyboardMarkup([[
+ InlineKeyboardButton("✅ Hecho", callback_data=f"rem:done:{rid}"),
+ InlineKeyboardButton("⏰ +1h", callback_data=f"rem:snooze:{rid}"),
+ InlineKeyboardButton("🗑️", callback_data=f"rem:del:{rid}"),
+ ]])
+
+
+async def on_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+ cfg: Config = context.bot_data["cfg"]
+ butler: Butler = context.bot_data["butler"]
+ query = update.callback_query
+ user = update.effective_user
+
+ if cfg.authorized_user_id is None or not user or user.id != cfg.authorized_user_id:
+ await query.answer("No autorizado")
+ return
+
+ parts = (query.data or "").split(":")
+ if len(parts) != 3 or parts[0] != "rem":
+ await query.answer()
+ return
+ _, action, sid = parts
+ rid = int(sid)
+ original = query.message.text or ""
+
+ if action == "done":
+ ok = butler.mark_reminder_done(rid)
+ await query.answer("✅ Hecho" if ok else "Ya no existe")
+ if ok:
+ await query.edit_message_text(f"✅ Hecho:\n{original}")
+ elif action == "snooze":
+ due = datetime.now(timezone.utc) + timedelta(hours=1)
+ ok = butler.db.update_reminder(rid, due_at_utc=due, clear_notified=True)
+ await query.answer("⏰ Pospuesto 1 hora" if ok else "Ya no existe")
+ if ok:
+ local = due.astimezone(cfg.tz).strftime("%H:%M")
+ await query.edit_message_text(f"⏰ Pospuesto hasta las {local}:\n{original}")
+ elif action == "del":
+ ok = butler.db.soft_delete_reminder(rid)
+ if ok:
+ butler.db.log_op("delete_reminder", rid, f"recordatorio #{rid}")
+ await query.answer("🗑️ Borrado" if ok else "Ya no existe")
+ if ok:
+ await query.edit_message_text(f"🗑️ Borrado:\n{original}")
+ else:
+ await query.answer()
+
+
+def build_application(cfg: Config, butler: Butler, transcriber: Transcriber) -> Application:
+ app = Application.builder().token(cfg.telegram_token).build()
+ app.bot_data["cfg"] = cfg
+ app.bot_data["butler"] = butler
+ app.bot_data["transcriber"] = transcriber
+
+ app.add_handler(CommandHandler("start", cmd_start))
+ app.add_handler(CommandHandler("id", cmd_id))
+ app.add_handler(CommandHandler(["recordatorios", "reminders"], cmd_reminders))
+ app.add_handler(CommandHandler(["notas", "notes"], cmd_notes))
+ app.add_handler(CommandHandler(["temas", "topics"], cmd_topics))
+ app.add_handler(CommandHandler(["tema", "topic"], cmd_tag))
+ app.add_handler(CommandHandler(["exportar", "export"], cmd_export))
+ app.add_handler(CommandHandler(["buscar", "search"], cmd_search))
+ app.add_handler(CommandHandler(["resumen", "summary"], cmd_summary))
+ app.add_handler(CommandHandler(["hecho", "done"], cmd_done))
+ app.add_handler(CommandHandler(["deshacer", "undo"], cmd_undo))
+ app.add_handler(CommandHandler(["borrar", "delete"], cmd_delete))
+ app.add_handler(CommandHandler(["editar", "edit"], cmd_edit))
+ app.add_handler(CommandHandler(["contexto", "mover", "context", "move"], cmd_context))
+ app.add_handler(CallbackQueryHandler(on_callback, pattern=r"^rem:"))
+ app.add_handler(MessageHandler(filters.VOICE | filters.AUDIO, on_voice))
+ app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, on_text))
+ return app
--- /dev/null
+"""Configuration loaded from environment variables (and an optional .env file).
+
+Every setting has a sane default for the Raspberry Pi deployment; the local
+test setup overrides the path-related ones via its own .env.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from zoneinfo import ZoneInfo
+
+try:
+ from dotenv import load_dotenv
+except ImportError: # dotenv is optional at runtime
+ def load_dotenv(*_args, **_kwargs): # type: ignore
+ return False
+
+
+def _env(name: str, default: str | None = None) -> str | None:
+ val = os.environ.get(name, default)
+ if val is not None:
+ val = val.strip()
+ return val or default
+
+
+def _env_int(name: str, default: int | None) -> int | None:
+ raw = _env(name)
+ if raw is None or raw == "":
+ return default
+ try:
+ return int(raw)
+ except ValueError:
+ return default
+
+
+@dataclass
+class Config:
+ # --- Telegram ---
+ telegram_token: str
+ authorized_user_id: int | None # if None, bot runs in "tell me your id" mode
+
+ # --- ollama / models ---
+ ollama_host: str
+ llm_model: str
+ embed_model: str
+ use_embeddings: bool
+
+ # --- whisper.cpp ---
+ whisper_bin: str
+ whisper_model: str
+ whisper_lang: str
+
+ # --- storage ---
+ db_path: str
+ data_dir: str
+
+ # --- HTTP morning endpoint ---
+ http_host: str
+ http_port: int
+ morning_token: str | None
+
+ # --- misc ---
+ tz: ZoneInfo = field(default_factory=lambda: ZoneInfo("Europe/Madrid"))
+ reminder_poll_seconds: int = 30
+ # Named contexts/locations; anything not matching one of these is "general".
+ contexts: list[str] = field(default_factory=lambda: ["burgos", "madrid"])
+
+ @classmethod
+ def load(cls, env_file: str | os.PathLike | None = None) -> "Config":
+ if env_file:
+ load_dotenv(env_file, override=False)
+ else:
+ # Load a .env from CWD if present; harmless if absent.
+ load_dotenv(override=False)
+
+ token = _env("TELEGRAM_BOT_TOKEN")
+ if not token:
+ raise SystemExit(
+ "TELEGRAM_BOT_TOKEN is not set. Copy .env.example to .env and fill it in."
+ )
+
+ data_dir = _env("DATA_DIR", str(Path.home() / ".local/share/segismundo"))
+ db_path = _env("DB_PATH", str(Path(data_dir) / "segismundo.db"))
+
+ tz_name = _env("TZ", "Europe/Madrid")
+ try:
+ tz = ZoneInfo(tz_name)
+ except Exception:
+ tz = ZoneInfo("UTC")
+
+ contexts = [c.strip().lower() for c in _env("CONTEXTS", "burgos,madrid").split(",")
+ if c.strip()]
+
+ return cls(
+ telegram_token=token,
+ authorized_user_id=_env_int("AUTHORIZED_USER_ID", None),
+ ollama_host=_env("OLLAMA_HOST", "http://127.0.0.1:11434"),
+ llm_model=_env("LLM_MODEL", "qwen2.5:3b"),
+ embed_model=_env("EMBED_MODEL", "nomic-embed-text"),
+ use_embeddings=_env("USE_EMBEDDINGS", "true").lower() in ("1", "true", "yes"),
+ whisper_bin=_env("WHISPER_BIN", "whisper-cli"),
+ whisper_model=_env("WHISPER_MODEL", ""),
+ whisper_lang=_env("WHISPER_LANG", "es"),
+ db_path=db_path,
+ data_dir=data_dir,
+ http_host=_env("HTTP_HOST", "0.0.0.0"),
+ http_port=_env_int("HTTP_PORT", 8081),
+ morning_token=_env("MORNING_TOKEN", None),
+ tz=tz,
+ reminder_poll_seconds=_env_int("REMINDER_POLL_SECONDS", 30),
+ contexts=contexts or ["burgos", "madrid"],
+ )
--- /dev/null
+"""The butler's brain: turn an incoming message into an action and a reply.
+
+This module is deliberately transport-agnostic — it knows nothing about
+Telegram. `handle_message` takes text and returns a string reply, so it can be
+unit-tested directly (see tests/test_core.py) and reused by the HTTP layer.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from datetime import datetime, timezone
+
+from .db import Database
+from .llm import LLM
+from .timeparse import parse_date_range, parse_when
+
+# Strong signal that a message is a question, used to correct the occasional
+# small-model misclassification of a question as a note.
+_INTERROGATIVE_RE = re.compile(
+ r"(\?|¿)|^\s*(qu[ée]|cu[áa]l(es)?|cu[áa]ndo|d[óo]nde|ad[óo]nde|qui[ée]n(es)?|"
+ r"c[óo]mo|cu[áa]nto?s?|cu[áa]nta?s?|por\s+qu[ée]|para\s+qu[ée])\b",
+ re.IGNORECASE,
+)
+
+
+def _looks_like_question(text: str) -> bool:
+ return bool(_INTERROGATIVE_RE.search(text.strip()))
+
+
+# Words that indicate a genuine "show me my list" request (vs. a specific
+# question that the model mislabelled as a listing).
+_LIST_WORDS_RE = re.compile(
+ r"\b(notas?|recordatorios?|pendientes?|tareas?|lista)\b", re.IGNORECASE
+)
+
+# --- Delete / undo: handled deterministically, because the small model tends to
+# misclassify "borra el recordatorio 1" as a note. ---
+_DELETE_VERB_RE = re.compile(
+ # b[oó]rra… but NOT "borrador"/"borradores" (draft), which merely contains it.
+ r"\b(b[oó]rra(?!dor)\w*|elimina\w*|quita\w*|suprim\w*|descarta\w*|c[aá]ncela\w*)\b",
+ re.IGNORECASE,
+)
+_UNDO_RE = re.compile(
+ r"\b(deshaz|deshacer|undo|rev(?:ierte|ertir)|vuelve\s+atr[áa]s|"
+ r"anula\s+(?:eso|lo\s+[úu]ltimo|la\s+[úu]ltima)|"
+ r"me\s+(?:he\s+)?equivoqu[ée]|no\s+era\s+eso|c[aá]ncela\s+eso)\b",
+ re.IGNORECASE,
+)
+_REMINDER_WORD_RE = re.compile(
+ r"\b(recordatorios?|avisos?|alarmas?|recu[eé]rd\w*)\b", re.IGNORECASE
+)
+_NOTE_WORD_RE = re.compile(r"\b(notas?|apuntes?|memos?)\b", re.IGNORECASE)
+_LAST_RE = re.compile(r"\b([úu]ltim[oa]s?|recient\w*)\b", re.IGNORECASE)
+# Don't treat "recuérdame borra…" as a delete — it's a reminder to delete later.
+_REMINDER_PREFIX_RE = re.compile(r"^\s*(recu[eé]rda\w*|av[ií]sa\w*)", re.IGNORECASE)
+
+_ID_WORDS = {
+ "uno": 1, "una": 1, "dos": 2, "tres": 3, "cuatro": 4, "cinco": 5, "seis": 6,
+ "siete": 7, "ocho": 8, "nueve": 9, "diez": 10, "once": 11, "doce": 12,
+}
+
+
+def _extract_id(text: str) -> int | None:
+ m = re.search(r"\b(\d{1,6})\b", text)
+ if m:
+ return int(m.group(1))
+ for word, n in _ID_WORDS.items():
+ if re.search(rf"\b{word}\b", text, re.IGNORECASE):
+ return n
+ return None
+
+
+def _is_delete_request(text: str) -> bool:
+ return bool(
+ _DELETE_VERB_RE.search(text)
+ and (_REMINDER_WORD_RE.search(text) or _NOTE_WORD_RE.search(text))
+ and not _NOTE_PREFIX_RE.match(text)
+ and not _NOTE_CREATE_RE.match(text)
+ and not _REMINDER_PREFIX_RE.match(text)
+ )
+
+
+# --- Edit / append (also deterministic, for the same reason as delete). ---
+_EDIT_VERB_RE = re.compile(
+ r"\b(a[ñn][aá]de\w*|agr[eé]ga\w*|cambia\w*|modifica\w*|edita\w*|"
+ r"actualiza\w*|corrige|rectifica\w*|renombra\w*)\b",
+ re.IGNORECASE,
+)
+_APPEND_VERB_RE = re.compile(r"\b(a[ñn][aá]de\w*|agr[eé]ga\w*)\b", re.IGNORECASE)
+_CONNECTOR_RE = re.compile(r"^(?:que|a|al|por|con)\b\s*", re.IGNORECASE)
+_PUNCT_RE = re.compile(r"^[\s:,.\-–]+")
+
+
+def _is_edit_request(text: str) -> bool:
+ return bool(
+ _EDIT_VERB_RE.search(text)
+ and (_REMINDER_WORD_RE.search(text) or _NOTE_WORD_RE.search(text))
+ and not _NOTE_PREFIX_RE.match(text)
+ and not _NOTE_CREATE_RE.match(text)
+ and not _REMINDER_PREFIX_RE.match(text)
+ )
+
+
+# --- Move to another context ("mueve la nota 3 a Madrid"). Checked before edit
+# so "cambia el contexto de la nota 3 a Madrid" re-files instead of editing. ---
+_MOVE_VERB_RE = re.compile(
+ r"\b(mueve\w*|mover|traslada\w*|reasigna\w*|recoloca\w*|reub[íi]ca\w*)\b",
+ re.IGNORECASE,
+)
+_CONTEXT_WORD_RE = re.compile(r"\bcontexto\b", re.IGNORECASE)
+
+
+def _is_move_request(text: str) -> bool:
+ return bool(
+ (_MOVE_VERB_RE.search(text) or _CONTEXT_WORD_RE.search(text))
+ and (_REMINDER_WORD_RE.search(text) or _NOTE_WORD_RE.search(text))
+ and not _NOTE_PREFIX_RE.match(text)
+ and not _NOTE_CREATE_RE.match(text)
+ and not _REMINDER_PREFIX_RE.match(text)
+ )
+
+
+def _edit_payload(text: str) -> str:
+ """The new content/time: everything after the id number, else after the
+ target word, else after 'último'."""
+ for pat in (r"\b\d{1,6}\b",
+ r"\b(?:notas?|apuntes?|recordatorios?|avisos?|alarmas?)\b",
+ r"\b[úu]ltim[oa]s?\b"):
+ matches = list(re.finditer(pat, text, re.IGNORECASE))
+ if matches:
+ rest = text[matches[-1].end():]
+ rest = _PUNCT_RE.sub("", rest.strip())
+ rest = _CONNECTOR_RE.sub("", rest)
+ return _PUNCT_RE.sub("", rest).strip()
+ return ""
+
+
+# Leading "save this" verbs we trim so a note reads cleanly, without an LLM
+# rewrite (which would risk paraphrasing away content).
+_NOTE_PREFIX_RE = re.compile(
+ r"^\s*(?:oye\s+|vale\s+)?"
+ r"(?:ap[úu]nta(?:me)?|anota(?:me)?|gu[áa]rda(?:me)?|recuerda|nota)\b"
+ r"[:,]?\s*(?:(?:de\s+)?que\s+|lo\s+siguiente[:,]?\s*)?",
+ re.IGNORECASE,
+)
+
+# A note-CREATION wrapper — distinct from an *edit* of an existing note. Matches
+# "añade/agrega una nota que diga (que) …", "añade una nota: …", and the common
+# Whisper mishearing where the leading verb is garbled but "una nota que diga
+# que …" survives ("allá de una nota que diga que …"). Two safe shapes:
+# • <article> nota que diga|ponga … (the "a note that says …" form)
+# • una|otra nota[:,] … (an explicit *indefinite* note)
+# Neither can match an edit like "añade a la nota 3 …": that has a number right
+# after the noun and the definite "a la", so both alternatives fail. This is why
+# we check it before _is_edit_request / _is_delete_request, which otherwise
+# mistake "añade una nota …" for an append to note #N.
+_NOTE_CREATE_RE = re.compile(
+ r"^\s*(?:oye\s+|vale\s+)?"
+ r"(?:\S+\s+){0,2}" # 0-2 leading words (verb, maybe garbled)
+ r"(?:"
+ r"(?:una|la|otra|el)\s+(?:nota|apunte)s?\s+"
+ r"que\s+(?:diga|ponga|digan|pongan|pon)\w*\s+(?:que\s+)?"
+ r"|"
+ r"(?:una|otra)\s+(?:nota|apunte)s?\s*[:,]\s*"
+ r")",
+ re.IGNORECASE,
+)
+
+# A long, rambling message is a memo ("tape recorder") — keep it verbatim as a
+# note even if it mentions things to do, rather than squeezing it into a
+# paraphrased reminder and losing content.
+_LONG_NOTE_CHARS = 220
+
+
+def _clean_note(text: str) -> str:
+ cleaned = _NOTE_CREATE_RE.sub("", text, count=1)
+ if cleaned == text:
+ cleaned = _NOTE_PREFIX_RE.sub("", text, count=1)
+ cleaned = cleaned.strip()
+ return cleaned or text.strip()
+
+
+def _fmt_local(iso_utc: str, tz) -> str:
+ dt = datetime.fromisoformat(iso_utc).astimezone(tz)
+ return dt.strftime("%d/%m %H:%M")
+
+
+def _detect_context(text: str, contexts) -> str | None:
+ """Return the named context mentioned in the message, or None (= general)."""
+ low = text.lower()
+ for c in contexts:
+ if re.search(rf"\b{re.escape(c)}\b", low):
+ return c
+ return None
+
+
+def _ctx_label(context: str | None) -> str:
+ return "" if not context or context == "general" else f" 📍{context.capitalize()}"
+
+
+class Butler:
+ def __init__(self, db: Database, llm: LLM, tz, contexts=("burgos", "madrid")):
+ self.db = db
+ self.llm = llm
+ self.tz = tz
+ self.contexts = [c.lower() for c in contexts]
+
+ async def handle_message(self, text: str, source: str = "text") -> str:
+ text = (text or "").strip()
+ if not text:
+ return "No he recibido nada que procesar."
+
+ # Deterministic, high-priority intents the LLM gets wrong. Handled before
+ # classification so a delete/undo is never mistakenly stored as a note.
+ if _UNDO_RE.search(text):
+ return await self.undo_last()
+ # Move-to-context is checked before edit so "cambia el contexto de la nota
+ # 3 a Madrid" re-files instead of overwriting the note's text.
+ if _is_move_request(text):
+ return self.handle_move(text)
+ # Edit is checked before delete: "añade … borrador" should append, not
+ # delete (the word "borrador" merely contains "borra").
+ if _is_edit_request(text):
+ return await self.handle_edit(text)
+ if _is_delete_request(text):
+ return self.handle_delete(text)
+
+ intent = await self.llm.classify(text)
+
+ # Safety net: a clearly interrogative message that the model filed as a
+ # note is almost always a question about existing notes.
+ if intent.action == "note" and _looks_like_question(text):
+ intent.action = "question"
+ intent.query = intent.note_text or text
+
+ # A specific question mislabelled as a listing ("¿dónde aparqué el
+ # coche?") should search the notes, unless it actually asks to see the
+ # notes/reminders list ("¿qué notas tengo?").
+ if (intent.action in ("list_notes", "list_reminders")
+ and _looks_like_question(text)
+ and not _LIST_WORDS_RE.search(text)):
+ intent.action = "question"
+ intent.query = text
+
+ # Memo safety net: a long brain-dump is a note to keep verbatim, never a
+ # paraphrased reminder. Questions are exempt (handled above).
+ if len(text) > _LONG_NOTE_CHARS and intent.action in ("note", "reminder", "chitchat"):
+ intent.action = "note"
+
+ # Which context (Burgos/Madrid/…) is mentioned? None means "general" on
+ # write, and "no filter / all contexts" on read.
+ ctx = _detect_context(text, self.contexts)
+
+ # Date-scoped recall: "¿qué grabé ayer?", "mis notas de la semana pasada".
+ if intent.action in ("question", "list_notes"):
+ rng = parse_date_range(text, self.tz)
+ if rng:
+ return await self._recall_range(text, rng[0], rng[1], context=ctx)
+
+ if intent.action == "note":
+ # Store the ORIGINAL text verbatim (lightly de-prefixed), so nothing
+ # the user said is ever lost to an LLM rewrite.
+ return await self._save_note(_clean_note(text), source, context=ctx or "general")
+
+ if intent.action == "reminder":
+ return await self._create_reminder(intent.reminder_text or text, intent.when,
+ context=ctx or "general")
+
+ if intent.action == "question":
+ return await self._answer_question(intent.query or text, context=ctx)
+
+ if intent.action == "list_reminders":
+ return self.list_reminders(context=ctx)
+
+ if intent.action == "list_notes":
+ return self.list_notes(context=ctx)
+
+ if intent.action == "summary":
+ return self.morning_summary_data()[0]
+
+ # chitchat / fallback: hold a real (brief) conversation.
+ try:
+ return await self.llm.chat(text)
+ except Exception:
+ return intent.reply or "Aquí estoy, a tu servicio. ✅"
+
+ # ---------------- actions ----------------
+
+ async def _save_note(self, content: str, source: str,
+ context: str = "general") -> str:
+ emb = await self.llm.embed(content)
+ tags = await self.llm.suggest_tags(content)
+ note_id = self.db.add_note(content, source=source, embedding=emb,
+ tags=" ".join(tags), context=context)
+ self.db.log_op("create_note", note_id, f"nota #{note_id}")
+ preview = content if len(content) <= 200 else content[:200].rstrip() + "…"
+ tagline = f"\n🏷️ {' '.join('#' + t for t in tags)}" if tags else ""
+ return (f"📝 Nota guardada (#{note_id}){_ctx_label(context)}:\n"
+ f"{preview}{tagline}\n(«deshaz» para anular)")
+
+ async def _recall_range(self, query: str, start: datetime, end: datetime,
+ context: str | None = None) -> str:
+ notes = self.db.notes_in_range(start, end, context=context)
+ if not notes:
+ scope = _ctx_label(context).strip() or "ese periodo"
+ return f"No tengo nada guardado de {scope}. 🤔"
+ return await self.llm.answer(query, [n.content for n in notes])
+
+ async def _create_reminder(self, text: str, when: str,
+ context: str = "general") -> str:
+ due = parse_when(when, self.tz)
+ if due is None:
+ # Couldn't parse a time — keep it as a note so nothing is lost
+ # (this also logs a create_note op, so it's undoable).
+ await self._save_note(text, "reminder-fallback", context=context)
+ return (
+ "⏰ No he entendido la fecha/hora del recordatorio, así que lo he "
+ f"guardado como nota:\n{text}\n\n"
+ "Prueba de nuevo con algo como «recuérdame mañana a las 9 …»."
+ )
+ rid = self.db.add_reminder(text, due, context=context)
+ self.db.log_op("create_reminder", rid, f"recordatorio #{rid}")
+ return (
+ f"⏰ Recordatorio #{rid} programado para "
+ f"{due.astimezone(self.tz).strftime('%A %d/%m a las %H:%M')}{_ctx_label(context)}:"
+ f"\n{text}\n(«deshaz» para anular)"
+ )
+
+ # ---------------- delete / undo ----------------
+
+ def mark_reminder_done(self, reminder_id: int) -> bool:
+ ok = self.db.mark_done(reminder_id)
+ if ok:
+ self.db.log_op("done_reminder", reminder_id, f"recordatorio #{reminder_id}")
+ return ok
+
+ def handle_delete(self, text: str) -> str:
+ target = ("reminder" if _REMINDER_WORD_RE.search(text)
+ else "note" if _NOTE_WORD_RE.search(text) else None)
+ rid = _extract_id(text)
+ wants_last = bool(_LAST_RE.search(text))
+
+ if target is None:
+ return ("¿Quieres borrar una nota o un recordatorio? Dime, p.ej., "
+ "«borra el recordatorio 3» o usa /borrar.")
+ noun = "recordatorio" if target == "reminder" else "nota"
+
+ if rid is None and wants_last:
+ rid = self._last_id(target)
+ if rid is None:
+ return (f"¿Cuál {noun}? Dime el número (mira /{'recordatorios' if target=='reminder' else 'notas'}) "
+ f"o «borra el último {noun}».")
+
+ if target == "reminder":
+ ok = self.db.soft_delete_reminder(rid)
+ if ok:
+ self.db.log_op("delete_reminder", rid, f"recordatorio #{rid}")
+ else:
+ ok = self.db.soft_delete_note(rid)
+ if ok:
+ self.db.log_op("delete_note", rid, f"nota #{rid}")
+ if not ok:
+ return f"No encontré {('el recordatorio' if target=='reminder' else 'la nota')} #{rid}."
+ return f"🗑️ {noun.capitalize()} #{rid} borrad{'o' if target=='reminder' else 'a'}. («deshaz» para recuperarlo.)"
+
+ def handle_move(self, text: str) -> str:
+ target = ("reminder" if _REMINDER_WORD_RE.search(text)
+ else "note" if _NOTE_WORD_RE.search(text) else None)
+ if target is None:
+ return "¿Quieres mover una nota o un recordatorio? Usa /contexto."
+ rid = _extract_id(text)
+ if rid is None and _LAST_RE.search(text):
+ rid = self._last_id(target)
+ noun = "recordatorio" if target == "reminder" else "nota"
+ if rid is None:
+ return f"¿Cuál {noun}? Dime el número."
+ # Destination context: a configured name, or "general".
+ dest = _detect_context(text, self.contexts)
+ if dest is None and re.search(r"\bgeneral\b", text, re.IGNORECASE):
+ dest = "general"
+ if dest is None:
+ opciones = ", ".join(self.contexts + ["general"])
+ return f"¿A qué contexto? Opciones: {opciones}."
+
+ if target == "reminder":
+ cur = self.db.get_reminder(rid)
+ old = cur.context if cur else None
+ ok = self.db.set_reminder_context(rid, dest)
+ else:
+ cur = self.db.get_note(rid)
+ old = cur.context if cur else None
+ ok = self.db.set_note_context(rid, dest)
+ if not ok:
+ return f"No encontré {('el recordatorio' if target=='reminder' else 'la nota')} #{rid}."
+ kind = "move_reminder" if target == "reminder" else "move_note"
+ self.db.log_op(kind, rid, f"{noun} #{rid}", json.dumps({"context": old or "general"}))
+ return (f"📍 {noun.capitalize()} #{rid} movid{'o' if target=='reminder' else 'a'} a "
+ f"{dest.capitalize()}. («deshaz» para revertir)")
+
+ def _last_id(self, target: str) -> int | None:
+ if target == "note":
+ notes = self.db.recent_notes(limit=1)
+ return notes[0].id if notes else None
+ rems = self.db.pending_reminders()
+ return max((r.id for r in rems), default=None)
+
+ async def handle_edit(self, text: str) -> str:
+ target = ("reminder" if _REMINDER_WORD_RE.search(text)
+ else "note" if _NOTE_WORD_RE.search(text) else None)
+ if target is None:
+ return "¿Quieres editar una nota o un recordatorio? Usa /editar o dime el número."
+ rid = _extract_id(text)
+ if rid is None and _LAST_RE.search(text):
+ rid = self._last_id(target)
+ noun = "recordatorio" if target == "reminder" else "nota"
+ if rid is None:
+ return f"¿Cuál {noun}? Dime el número (mira /{'recordatorios' if target=='reminder' else 'notas'})."
+ append = bool(_APPEND_VERB_RE.search(text))
+ payload = _edit_payload(text)
+ if not payload:
+ return f"¿Qué quieres {'añadir' if append else 'poner'}? Escríbelo después del número."
+ if target == "note":
+ return await self._edit_note(rid, payload, append)
+ return await self._edit_reminder(rid, payload, append)
+
+ async def _edit_note(self, note_id: int, payload: str, append: bool) -> str:
+ note = self.db.get_note(note_id)
+ if note is None:
+ return f"No encontré la nota #{note_id}."
+ snapshot = json.dumps({"content": note.content, "tags": note.tags})
+ new_content = f"{note.content}\n{payload}" if append else payload
+ emb = await self.llm.embed(new_content)
+ tags = await self.llm.suggest_tags(new_content)
+ self.db.update_note(note_id, new_content, emb, " ".join(tags))
+ self.db.log_op("edit_note", note_id, f"nota #{note_id}", snapshot)
+ preview = new_content if len(new_content) <= 200 else new_content[:200].rstrip() + "…"
+ verb = "ampliada" if append else "actualizada"
+ return f"✏️ Nota #{note_id} {verb}:\n{preview}\n(«deshaz» para revertir)"
+
+ async def _edit_reminder(self, rid: int, payload: str, append: bool) -> str:
+ rem = self.db.get_reminder(rid)
+ if rem is None:
+ return f"No encontré el recordatorio #{rid}."
+ snapshot = json.dumps({"text": rem.text, "due_at": rem.due_at})
+ if append:
+ self.db.update_reminder(rid, text=f"{rem.text} {payload}")
+ change = "ampliado"
+ else:
+ due = parse_when(payload, self.tz)
+ if due is not None:
+ self.db.update_reminder(rid, due_at_utc=due, clear_notified=True)
+ change = ("reprogramado para "
+ + due.astimezone(self.tz).strftime("%A %d/%m a las %H:%M"))
+ else:
+ self.db.update_reminder(rid, text=payload)
+ change = "actualizado"
+ self.db.log_op("edit_reminder", rid, f"recordatorio #{rid}", snapshot)
+ return f"✏️ Recordatorio #{rid} {change}.\n(«deshaz» para revertir)"
+
+ async def undo_last(self) -> str:
+ op = self.db.last_op()
+ if op is None:
+ return "No hay nada que deshacer. 🤷"
+
+ if op.kind == "done_reminder":
+ self.db.set_done(op.target_id, False)
+ elif op.kind == "create_note":
+ self.db.soft_delete_note(op.target_id)
+ elif op.kind == "create_reminder":
+ self.db.soft_delete_reminder(op.target_id)
+ elif op.kind == "delete_note":
+ self.db.restore_note(op.target_id)
+ elif op.kind == "delete_reminder":
+ self.db.restore_reminder(op.target_id)
+ elif op.kind == "edit_note":
+ data = json.loads(op.undo_data or "{}")
+ emb = await self.llm.embed(data.get("content", ""))
+ self.db.update_note(op.target_id, data.get("content", ""), emb,
+ data.get("tags", ""))
+ elif op.kind == "edit_reminder":
+ data = json.loads(op.undo_data or "{}")
+ due = datetime.fromisoformat(data["due_at"]) if data.get("due_at") else None
+ self.db.update_reminder(op.target_id, text=data.get("text"),
+ due_at_utc=due, clear_notified=True)
+ elif op.kind == "move_note":
+ self.db.set_note_context(op.target_id, json.loads(op.undo_data or "{}").get("context", "general"))
+ elif op.kind == "move_reminder":
+ self.db.set_reminder_context(op.target_id, json.loads(op.undo_data or "{}").get("context", "general"))
+ else:
+ return "No sé cómo deshacer la última operación."
+
+ self.db.mark_op_undone(op.id)
+ verbs = {
+ "create_note": "he eliminado la nota que acababa de guardar",
+ "create_reminder": "he eliminado el recordatorio que acababa de crear",
+ "delete_note": f"he restaurado la {op.summary}",
+ "delete_reminder": f"he restaurado el {op.summary}",
+ "done_reminder": f"he reactivado el {op.summary}",
+ "edit_note": f"he revertido los cambios en la {op.summary}",
+ "edit_reminder": f"he revertido los cambios en el {op.summary}",
+ "move_note": f"he devuelto la {op.summary} a su contexto anterior",
+ "move_reminder": f"he devuelto el {op.summary} a su contexto anterior",
+ }
+ return f"↩️ Hecho: {verbs.get(op.kind, 'operación deshecha')}."
+
+ async def _answer_question(self, query: str, context: str | None = None) -> str:
+ qvec = await self.llm.embed(query)
+ notes = self.db.search_hybrid(query, qvec, limit=6, context=context)
+ answer = await self.llm.answer(query, [n.content for n in notes])
+ return answer
+
+ # ---------------- listings ----------------
+
+ def list_reminders(self, context: str | None = None) -> str:
+ rems = self.db.pending_reminders(context=context)
+ scope = _ctx_label(context).strip()
+ if not rems:
+ extra = f" de {scope}" if scope else ""
+ return f"No tienes recordatorios pendientes{extra}. ✅"
+ header = f"⏰ Recordatorios pendientes{(' · ' + scope) if scope else ''}:"
+ lines = [header]
+ for r in rems:
+ # Only show the per-item context label when not already filtering.
+ label = _ctx_label(r.context) if context is None else ""
+ lines.append(f" #{r.id} · {_fmt_local(r.due_at, self.tz)} · {r.text}{label}")
+ return "\n".join(lines)
+
+ def _note_line(self, n) -> str:
+ body = n.content if len(n.content) <= 160 else n.content[:160].rstrip() + "…"
+ tags = f" 🏷️ {' '.join('#' + t for t in n.tags.split())}" if n.tags else ""
+ label = _ctx_label(n.context)
+ return f" #{n.id} · {_fmt_local(n.created_at, self.tz)}{label} · {body}{tags}"
+
+ def list_notes(self, limit: int = 10, context: str | None = None) -> str:
+ notes = self.db.recent_notes(limit=limit, context=context)
+ scope = _ctx_label(context).strip()
+ if not notes:
+ extra = f" de {scope}" if scope else ""
+ return f"Aún no has guardado ninguna nota{extra}."
+ header = f"📝 Notas recientes{(' · ' + scope) if scope else f' (últimas {len(notes)})'}:"
+ lines = [header]
+ lines += [self._note_line(n) for n in notes]
+ return "\n".join(lines)
+
+ # ---------------- topics / browse ----------------
+
+ def list_topics(self) -> str:
+ tags = self.db.all_tags()
+ if not tags:
+ return "Aún no hay temas. Guarda algunas notas y las etiquetaré por ti."
+ lines = ["🏷️ Tus temas (usa /tema <nombre> para ver las notas):"]
+ lines += [f" #{tag} ({count})" for tag, count in tags]
+ return "\n".join(lines)
+
+ def notes_for_tag(self, tag: str) -> str:
+ tag = tag.lstrip("#").strip().lower()
+ notes = self.db.notes_by_tag(tag)
+ if not notes:
+ return f"No tengo notas con el tema «{tag}»."
+ lines = [f"🏷️ Notas de «{tag}»:"]
+ lines += [self._note_line(n) for n in notes]
+ return "\n".join(lines)
+
+ def export_markdown(self) -> str:
+ """A full dump of every note as Markdown, for /export."""
+ notes = self.db.all_notes()
+ now = datetime.now(self.tz).strftime("%d/%m/%Y %H:%M")
+ out = [f"# Notas de Segismundo", f"_Exportado el {now} — {len(notes)} notas_\n"]
+ for n in notes:
+ ts = datetime.fromisoformat(n.created_at).astimezone(self.tz)
+ tags = f" — _{', '.join('#' + t for t in n.tags.split())}_" if n.tags else ""
+ src = "🎙️" if n.source == "voice" else "⌨️"
+ out.append(f"## #{n.id} · {ts.strftime('%d/%m/%Y %H:%M')} {src}{tags}\n")
+ out.append(n.content + "\n")
+ return "\n".join(out)
+
+ # ---------------- morning summary ----------------
+
+ def context_briefing(self, context: str) -> tuple[str, dict]:
+ """An 'I just arrived' briefing for one context. Returns (text, dict)."""
+ label = context.capitalize()
+ rems = self.db.pending_reminders(context=context)
+ notes = self.db.recent_notes(limit=10, context=context)
+
+ parts = [f"📍 {label} — esto es lo que tienes aquí:\n"]
+ if rems:
+ parts.append("⏰ Recordatorios:")
+ for r in rems:
+ local = datetime.fromisoformat(r.due_at).astimezone(self.tz)
+ parts.append(f" · {local.strftime('%d/%m %H:%M')} — {r.text}")
+ else:
+ parts.append("⏰ Sin recordatorios pendientes.")
+ if notes:
+ parts.append("\n🗒️ Notas:")
+ parts += [f" · {n.content if len(n.content) <= 160 else n.content[:160].rstrip() + '…'}"
+ for n in notes]
+
+ data = {
+ "context": context,
+ "reminders": [
+ {"id": r.id,
+ "due": datetime.fromisoformat(r.due_at).astimezone(self.tz).isoformat(),
+ "text": r.text}
+ for r in rems
+ ],
+ "notes": [{"id": n.id, "content": n.content} for n in notes],
+ }
+ return "\n".join(parts), data
+
+ def morning_summary_data(self) -> tuple[str, dict]:
+ """Build the morning briefing. Returns (human_text, machine_dict)."""
+ now = datetime.now(self.tz)
+ rems = self.db.pending_reminders()
+
+ overdue, today, upcoming = [], [], []
+ for r in rems:
+ due_local = datetime.fromisoformat(r.due_at).astimezone(self.tz)
+ if due_local < now:
+ overdue.append((r, due_local))
+ elif due_local.date() == now.date():
+ today.append((r, due_local))
+ else:
+ upcoming.append((r, due_local))
+
+ recent = self.db.recent_notes(limit=5)
+
+ # Human-readable text.
+ parts = [f"☀️ Buenos días. Resumen del {now.strftime('%A %d/%m')}:\n"]
+ if overdue:
+ parts.append("🔴 Atrasados:")
+ parts += [f" · {dl.strftime('%d/%m %H:%M')} — {r.text}{_ctx_label(r.context)}"
+ for r, dl in overdue]
+ if today:
+ parts.append("📅 Para hoy:")
+ parts += [f" · {dl.strftime('%H:%M')} — {r.text}{_ctx_label(r.context)}"
+ for r, dl in today]
+ if upcoming:
+ parts.append("🔜 Próximos:")
+ parts += [f" · {dl.strftime('%d/%m %H:%M')} — {r.text}{_ctx_label(r.context)}"
+ for r, dl in upcoming[:5]]
+ if not (overdue or today or upcoming):
+ parts.append("No tienes recordatorios pendientes. 🎉")
+ if recent:
+ parts.append("\n🗒️ Últimas notas:")
+ parts += [f" · {n.content}" for n in recent]
+
+ text = "\n".join(parts)
+
+ data = {
+ "date": now.isoformat(),
+ "overdue": [{"id": r.id, "due": dl.isoformat(), "text": r.text,
+ "context": r.context} for r, dl in overdue],
+ "today": [{"id": r.id, "due": dl.isoformat(), "text": r.text,
+ "context": r.context} for r, dl in today],
+ "upcoming": [{"id": r.id, "due": dl.isoformat(), "text": r.text,
+ "context": r.context} for r, dl in upcoming],
+ "recent_notes": [{"id": n.id, "content": n.content, "context": n.context}
+ for n in recent],
+ }
+ return text, data
--- /dev/null
+"""SQLite storage: notes (with FTS5 + embeddings) and reminders.
+
+The database is intentionally simple — a single file, no migrations framework.
+Semantic search uses brute-force cosine similarity over stored float32
+embeddings, which is more than fast enough for a personal note collection
+(thousands of notes) and avoids native vector-extension build headaches on ARM.
+"""
+
+from __future__ import annotations
+
+import sqlite3
+import struct
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Iterable, Sequence
+
+try:
+ import numpy as np
+except ImportError: # numpy only needed when embeddings are enabled
+ np = None # type: ignore
+
+
+SCHEMA = """
+CREATE TABLE IF NOT EXISTS notes (
+ id INTEGER PRIMARY KEY,
+ content TEXT NOT NULL,
+ source TEXT, -- 'voice' | 'text'
+ created_at TEXT NOT NULL, -- ISO-8601 UTC
+ embedding BLOB, -- packed float32, or NULL
+ tags TEXT NOT NULL DEFAULT '', -- space-separated topic tags
+ context TEXT NOT NULL DEFAULT 'general', -- general | burgos | madrid | …
+ deleted INTEGER NOT NULL DEFAULT 0 -- soft delete (recoverable via undo)
+);
+
+CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5(
+ content,
+ content='notes',
+ content_rowid='id',
+ tokenize="unicode61 remove_diacritics 2"
+);
+
+-- Keep the FTS index in sync with the notes table.
+CREATE TRIGGER IF NOT EXISTS notes_ai AFTER INSERT ON notes BEGIN
+ INSERT INTO notes_fts(rowid, content) VALUES (new.id, new.content);
+END;
+CREATE TRIGGER IF NOT EXISTS notes_ad AFTER DELETE ON notes BEGIN
+ INSERT INTO notes_fts(notes_fts, rowid, content) VALUES('delete', old.id, old.content);
+END;
+CREATE TRIGGER IF NOT EXISTS notes_au AFTER UPDATE ON notes BEGIN
+ INSERT INTO notes_fts(notes_fts, rowid, content) VALUES('delete', old.id, old.content);
+ INSERT INTO notes_fts(rowid, content) VALUES (new.id, new.content);
+END;
+
+CREATE TABLE IF NOT EXISTS reminders (
+ id INTEGER PRIMARY KEY,
+ text TEXT NOT NULL,
+ due_at TEXT NOT NULL, -- ISO-8601 UTC
+ created_at TEXT NOT NULL,
+ done INTEGER NOT NULL DEFAULT 0,
+ notified INTEGER NOT NULL DEFAULT 0,
+ context TEXT NOT NULL DEFAULT 'general',
+ deleted INTEGER NOT NULL DEFAULT 0 -- soft delete (recoverable via undo)
+);
+CREATE INDEX IF NOT EXISTS idx_reminders_due ON reminders(due_at, done, notified);
+
+-- Operation log powering "undo". Each mutating action appends a row; undo walks
+-- back the most recent not-yet-undone op and reverses it.
+CREATE TABLE IF NOT EXISTS ops (
+ id INTEGER PRIMARY KEY,
+ kind TEXT NOT NULL, -- create_note | create_reminder | delete_note |
+ -- delete_reminder | done_reminder | edit_note | edit_reminder
+ target_id INTEGER NOT NULL,
+ summary TEXT NOT NULL DEFAULT '', -- human description for the confirmation
+ undo_data TEXT NOT NULL DEFAULT '', -- JSON snapshot of prior state (for edits)
+ created_at TEXT NOT NULL,
+ undone INTEGER NOT NULL DEFAULT 0
+);
+"""
+
+
+def _now_iso() -> str:
+ return datetime.now(timezone.utc).isoformat()
+
+
+def _pack(vec: Sequence[float]) -> bytes:
+ return struct.pack(f"<{len(vec)}f", *vec)
+
+
+def _unpack(blob: bytes) -> "np.ndarray":
+ n = len(blob) // 4
+ return np.frombuffer(blob, dtype="<f4", count=n)
+
+
+@dataclass
+class Note:
+ id: int
+ content: str
+ source: str
+ created_at: str
+ tags: str = ""
+ context: str = "general"
+ score: float | None = None
+
+
+@dataclass
+class Reminder:
+ id: int
+ text: str
+ due_at: str # ISO UTC
+ created_at: str
+ done: bool
+ notified: bool
+ context: str = "general"
+
+
+@dataclass
+class Op:
+ id: int
+ kind: str
+ target_id: int
+ summary: str
+ undo_data: str = ""
+
+
+class Database:
+ def __init__(self, path: str):
+ self.path = path
+ self.conn = sqlite3.connect(path, check_same_thread=False)
+ self.conn.row_factory = sqlite3.Row
+ self.conn.execute("PRAGMA journal_mode=WAL;")
+ self.conn.execute("PRAGMA foreign_keys=ON;")
+ self.conn.executescript(SCHEMA)
+ self._migrate()
+ self.conn.commit()
+
+ def _migrate(self) -> None:
+ """Lightweight, additive migrations for pre-existing databases."""
+ note_cols = {r["name"] for r in self.conn.execute("PRAGMA table_info(notes)")}
+ if "tags" not in note_cols:
+ self.conn.execute("ALTER TABLE notes ADD COLUMN tags TEXT NOT NULL DEFAULT ''")
+ if "deleted" not in note_cols:
+ self.conn.execute("ALTER TABLE notes ADD COLUMN deleted INTEGER NOT NULL DEFAULT 0")
+ if "context" not in note_cols:
+ self.conn.execute("ALTER TABLE notes ADD COLUMN context TEXT NOT NULL DEFAULT 'general'")
+ rem_cols = {r["name"] for r in self.conn.execute("PRAGMA table_info(reminders)")}
+ if "deleted" not in rem_cols:
+ self.conn.execute("ALTER TABLE reminders ADD COLUMN deleted INTEGER NOT NULL DEFAULT 0")
+ if "context" not in rem_cols:
+ self.conn.execute("ALTER TABLE reminders ADD COLUMN context TEXT NOT NULL DEFAULT 'general'")
+ # ops table may predate undo_data (only if an old build created it).
+ op_tbl = self.conn.execute(
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='ops'").fetchone()
+ if op_tbl:
+ op_cols = {r["name"] for r in self.conn.execute("PRAGMA table_info(ops)")}
+ if "undo_data" not in op_cols:
+ self.conn.execute("ALTER TABLE ops ADD COLUMN undo_data TEXT NOT NULL DEFAULT ''")
+
+ def close(self) -> None:
+ self.conn.close()
+
+ # ---------------- notes ----------------
+
+ def add_note(self, content: str, source: str = "text",
+ embedding: Sequence[float] | None = None, tags: str = "",
+ context: str = "general") -> int:
+ blob = _pack(embedding) if embedding else None
+ cur = self.conn.execute(
+ "INSERT INTO notes(content, source, created_at, embedding, tags, context) "
+ "VALUES (?,?,?,?,?,?)",
+ (content, source, _now_iso(), blob, tags, context),
+ )
+ self.conn.commit()
+ return int(cur.lastrowid)
+
+ def get_note(self, note_id: int) -> Note | None:
+ r = self.conn.execute(
+ "SELECT id, content, source, created_at, tags, context FROM notes "
+ "WHERE id=? AND deleted=0", (note_id,)).fetchone()
+ return _row_to_note(r) if r else None
+
+ def update_note(self, note_id: int, content: str,
+ embedding: Sequence[float] | None, tags: str) -> bool:
+ blob = _pack(embedding) if embedding else None
+ cur = self.conn.execute(
+ "UPDATE notes SET content=?, embedding=?, tags=? WHERE id=? AND deleted=0",
+ (content, blob, tags, note_id),
+ )
+ self.conn.commit()
+ return cur.rowcount > 0
+
+ def set_note_context(self, note_id: int, context: str) -> bool:
+ cur = self.conn.execute(
+ "UPDATE notes SET context=? WHERE id=? AND deleted=0", (context, note_id))
+ self.conn.commit()
+ return cur.rowcount > 0
+
+ def recent_notes(self, limit: int = 10, context: str | None = None) -> list[Note]:
+ clause, params = "", []
+ if context:
+ clause = "AND context=? "
+ params.append(context)
+ params.append(limit)
+ rows = self.conn.execute(
+ "SELECT id, content, source, created_at, tags, context FROM notes "
+ f"WHERE deleted=0 {clause}ORDER BY created_at DESC LIMIT ?",
+ params,
+ ).fetchall()
+ return [_row_to_note(r) for r in rows]
+
+ def all_notes(self) -> list[Note]:
+ rows = self.conn.execute(
+ "SELECT id, content, source, created_at, tags, context FROM notes "
+ "WHERE deleted=0 ORDER BY created_at"
+ ).fetchall()
+ return [_row_to_note(r) for r in rows]
+
+ def notes_in_range(self, start_utc: datetime, end_utc: datetime,
+ limit: int = 100, context: str | None = None) -> list[Note]:
+ clause, params = "", [start_utc.astimezone(timezone.utc).isoformat(),
+ end_utc.astimezone(timezone.utc).isoformat()]
+ if context:
+ clause = "AND context=? "
+ params.append(context)
+ params.append(limit)
+ rows = self.conn.execute(
+ "SELECT id, content, source, created_at, tags, context FROM notes "
+ f"WHERE deleted=0 AND created_at >= ? AND created_at < ? {clause}"
+ "ORDER BY created_at DESC LIMIT ?",
+ params,
+ ).fetchall()
+ return [_row_to_note(r) for r in rows]
+
+ def notes_by_tag(self, tag: str, limit: int = 50) -> list[Note]:
+ # tags are stored space-separated; match a whole token.
+ rows = self.conn.execute(
+ "SELECT id, content, source, created_at, tags FROM notes "
+ "WHERE deleted=0 AND (' ' || lower(tags) || ' ') LIKE ? "
+ "ORDER BY created_at DESC LIMIT ?",
+ (f"% {tag.lower()} %", limit),
+ ).fetchall()
+ return [_row_to_note(r) for r in rows]
+
+ def all_tags(self) -> list[tuple[str, int]]:
+ counts: dict[str, int] = {}
+ for r in self.conn.execute("SELECT tags FROM notes WHERE deleted=0 AND tags != ''"):
+ for t in r["tags"].split():
+ counts[t] = counts.get(t, 0) + 1
+ return sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))
+
+ # ---- soft delete / restore ----
+
+ def soft_delete_note(self, note_id: int) -> bool:
+ cur = self.conn.execute(
+ "UPDATE notes SET deleted=1 WHERE id=? AND deleted=0", (note_id,))
+ self.conn.commit()
+ return cur.rowcount > 0
+
+ def restore_note(self, note_id: int) -> bool:
+ cur = self.conn.execute("UPDATE notes SET deleted=0 WHERE id=?", (note_id,))
+ self.conn.commit()
+ return cur.rowcount > 0
+
+ def search_fts(self, query: str, limit: int = 5,
+ context: str | None = None) -> list[Note]:
+ """Full-text keyword search. Returns best matches (lower rank = better)."""
+ # bm25() is lower-is-better; expose it as a positive score for the caller.
+ match = _fts_query(query)
+ if not match:
+ return []
+ clause, params = "", [match]
+ if context:
+ clause = "AND n.context=? "
+ params.append(context)
+ params.append(limit)
+ rows = self.conn.execute(
+ "SELECT n.id, n.content, n.source, n.created_at, n.tags, n.context, "
+ "bm25(notes_fts) AS rank "
+ "FROM notes_fts JOIN notes n ON n.id = notes_fts.rowid "
+ f"WHERE notes_fts MATCH ? AND n.deleted=0 {clause}ORDER BY rank LIMIT ?",
+ params,
+ ).fetchall()
+ return [_row_to_note(r, score=-r["rank"]) for r in rows]
+
+ def search_semantic(self, query_vec: Sequence[float], limit: int = 5,
+ context: str | None = None) -> list[Note]:
+ """Brute-force cosine similarity over stored embeddings."""
+ if np is None:
+ return []
+ clause, params = "", []
+ if context:
+ clause = "AND context=? "
+ params.append(context)
+ rows = self.conn.execute(
+ "SELECT id, content, source, created_at, tags, context, embedding FROM notes "
+ f"WHERE embedding IS NOT NULL AND deleted=0 {clause}",
+ params,
+ ).fetchall()
+ if not rows:
+ return []
+ q = np.asarray(query_vec, dtype="<f4")
+ qn = np.linalg.norm(q) or 1.0
+ scored: list[Note] = []
+ for r in rows:
+ v = _unpack(r["embedding"])
+ denom = (np.linalg.norm(v) * qn) or 1.0
+ sim = float(np.dot(v, q) / denom)
+ scored.append(_row_to_note(r, score=sim))
+ scored.sort(key=lambda n: n.score or 0.0, reverse=True)
+ return scored[:limit]
+
+ def search_hybrid(self, query: str, query_vec: Sequence[float] | None,
+ limit: int = 5, context: str | None = None) -> list[Note]:
+ """Merge keyword and semantic results, de-duplicated by note id."""
+ results: dict[int, Note] = {}
+ for n in self.search_fts(query, limit=limit, context=context):
+ results[n.id] = n
+ if query_vec is not None:
+ for n in self.search_semantic(query_vec, limit=limit, context=context):
+ results.setdefault(n.id, n)
+ out = list(results.values())
+ # Notes that matched both signals naturally appear once; keep FTS order
+ # first then fill with semantic. Cap to limit.
+ return out[:limit]
+
+ # ---------------- reminders ----------------
+
+ def add_reminder(self, text: str, due_at_utc: datetime,
+ context: str = "general") -> int:
+ cur = self.conn.execute(
+ "INSERT INTO reminders(text, due_at, created_at, context) VALUES (?,?,?,?)",
+ (text, due_at_utc.astimezone(timezone.utc).isoformat(), _now_iso(), context),
+ )
+ self.conn.commit()
+ return int(cur.lastrowid)
+
+ def due_reminders(self, now_utc: datetime) -> list[Reminder]:
+ rows = self.conn.execute(
+ "SELECT * FROM reminders WHERE done=0 AND deleted=0 AND notified=0 "
+ "AND due_at<=? ORDER BY due_at",
+ (now_utc.astimezone(timezone.utc).isoformat(),),
+ ).fetchall()
+ return [_row_to_reminder(r) for r in rows]
+
+ def pending_reminders(self, context: str | None = None) -> list[Reminder]:
+ clause, params = "", []
+ if context:
+ clause = "AND context=? "
+ params.append(context)
+ rows = self.conn.execute(
+ f"SELECT * FROM reminders WHERE done=0 AND deleted=0 {clause}ORDER BY due_at",
+ params,
+ ).fetchall()
+ return [_row_to_reminder(r) for r in rows]
+
+ def get_reminder(self, reminder_id: int) -> Reminder | None:
+ r = self.conn.execute(
+ "SELECT * FROM reminders WHERE id=? AND deleted=0", (reminder_id,)).fetchone()
+ return _row_to_reminder(r) if r else None
+
+ def mark_notified(self, reminder_id: int) -> None:
+ self.conn.execute("UPDATE reminders SET notified=1 WHERE id=?", (reminder_id,))
+ self.conn.commit()
+
+ def set_done(self, reminder_id: int, done: bool) -> bool:
+ cur = self.conn.execute(
+ "UPDATE reminders SET done=? WHERE id=? AND deleted=0",
+ (1 if done else 0, reminder_id),
+ )
+ self.conn.commit()
+ return cur.rowcount > 0
+
+ def mark_done(self, reminder_id: int) -> bool:
+ return self.set_done(reminder_id, True)
+
+ def update_reminder(self, reminder_id: int, text: str | None = None,
+ due_at_utc: datetime | None = None,
+ clear_notified: bool = False) -> bool:
+ sets, params = [], []
+ if text is not None:
+ sets.append("text=?")
+ params.append(text)
+ if due_at_utc is not None:
+ sets.append("due_at=?")
+ params.append(due_at_utc.astimezone(timezone.utc).isoformat())
+ if clear_notified:
+ sets.append("notified=0")
+ if not sets:
+ return False
+ params.append(reminder_id)
+ cur = self.conn.execute(
+ f"UPDATE reminders SET {', '.join(sets)} WHERE id=? AND deleted=0", params)
+ self.conn.commit()
+ return cur.rowcount > 0
+
+ def set_reminder_context(self, reminder_id: int, context: str) -> bool:
+ cur = self.conn.execute(
+ "UPDATE reminders SET context=? WHERE id=? AND deleted=0",
+ (context, reminder_id))
+ self.conn.commit()
+ return cur.rowcount > 0
+
+ def soft_delete_reminder(self, reminder_id: int) -> bool:
+ cur = self.conn.execute(
+ "UPDATE reminders SET deleted=1 WHERE id=? AND deleted=0", (reminder_id,))
+ self.conn.commit()
+ return cur.rowcount > 0
+
+ def restore_reminder(self, reminder_id: int) -> bool:
+ cur = self.conn.execute(
+ "UPDATE reminders SET deleted=0 WHERE id=?", (reminder_id,))
+ self.conn.commit()
+ return cur.rowcount > 0
+
+ # ---------------- undo / operation log ----------------
+
+ def log_op(self, kind: str, target_id: int, summary: str = "",
+ undo_data: str = "") -> int:
+ cur = self.conn.execute(
+ "INSERT INTO ops(kind, target_id, summary, undo_data, created_at) "
+ "VALUES (?,?,?,?,?)",
+ (kind, target_id, summary, undo_data, _now_iso()),
+ )
+ self.conn.commit()
+ return int(cur.lastrowid)
+
+ def last_op(self) -> Op | None:
+ r = self.conn.execute(
+ "SELECT id, kind, target_id, summary, undo_data FROM ops WHERE undone=0 "
+ "ORDER BY id DESC LIMIT 1"
+ ).fetchone()
+ return Op(r["id"], r["kind"], r["target_id"], r["summary"], r["undo_data"]) \
+ if r else None
+
+ def mark_op_undone(self, op_id: int) -> None:
+ self.conn.execute("UPDATE ops SET undone=1 WHERE id=?", (op_id,))
+ self.conn.commit()
+
+
+def _row_to_note(r: sqlite3.Row, score: float | None = None) -> Note:
+ keys = r.keys()
+ return Note(
+ id=r["id"],
+ content=r["content"],
+ source=r["source"],
+ created_at=r["created_at"],
+ tags=r["tags"] if "tags" in keys else "",
+ context=r["context"] if "context" in keys else "general",
+ score=score,
+ )
+
+
+def _row_to_reminder(r: sqlite3.Row) -> Reminder:
+ keys = r.keys()
+ return Reminder(
+ id=r["id"],
+ text=r["text"],
+ due_at=r["due_at"],
+ created_at=r["created_at"],
+ done=bool(r["done"]),
+ notified=bool(r["notified"]),
+ context=r["context"] if "context" in keys else "general",
+ )
+
+
+def _fts_query(text: str) -> str:
+ """Turn free text into a safe FTS5 MATCH expression (OR of terms)."""
+ import re
+
+ tokens = re.findall(r"\w+", text, flags=re.UNICODE)
+ tokens = [t for t in tokens if len(t) > 1]
+ if not tokens:
+ return ""
+ # Quote each token to avoid FTS5 syntax errors; OR them together.
+ return " OR ".join(f'"{t}"' for t in tokens)
--- /dev/null
+"""HTTP morning-briefing endpoint, served with aiohttp.
+
+Designed to be hit by an iPhone Shortcuts automation over Tailscale, e.g.:
+ GET http://rpi5:8081/morning
+ Header: Authorization: Bearer <MORNING_TOKEN>
+
+On success it returns the briefing as JSON *and* pushes it to Telegram, so the
+phone automation can either show the JSON or just trigger the Telegram message.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Awaitable, Callable
+
+from aiohttp import web
+
+from .core import Butler
+
+log = logging.getLogger("segismundo.http")
+
+SendFn = Callable[[str], Awaitable[None]]
+
+
+def build_app(butler: Butler, send: SendFn, token: str | None) -> web.Application:
+ app = web.Application()
+
+ def _authorized(request: web.Request) -> bool:
+ if not token:
+ return True # no token configured -> open (rely on Tailscale)
+ auth = request.headers.get("Authorization", "")
+ if auth == f"Bearer {token}":
+ return True
+ if request.query.get("token") == token:
+ return True
+ return False
+
+ async def health(_request: web.Request) -> web.Response:
+ return web.json_response({"status": "ok", "service": "segismundo",
+ "contexts": butler.contexts})
+
+ async def morning(request: web.Request) -> web.Response:
+ if not _authorized(request):
+ return web.json_response({"error": "unauthorized"}, status=401)
+ text, data = butler.morning_summary_data()
+ push = request.query.get("push", "1") not in ("0", "false", "no")
+ if push:
+ try:
+ await send(text)
+ except Exception:
+ log.exception("Failed to push morning summary to Telegram")
+ return web.json_response({"text": text, **data})
+
+ async def context_view(request: web.Request) -> web.Response:
+ if not _authorized(request):
+ return web.json_response({"error": "unauthorized"}, status=401)
+ name = request.match_info["name"].lower()
+ if name not in butler.contexts and name != "general":
+ return web.json_response(
+ {"error": "unknown context", "known": butler.contexts + ["general"]},
+ status=404,
+ )
+ text, data = butler.context_briefing(name)
+ push = request.query.get("push", "1") not in ("0", "false", "no")
+ if push:
+ try:
+ await send(text)
+ except Exception:
+ log.exception("Failed to push context briefing to Telegram")
+ return web.json_response({"text": text, **data})
+
+ app.router.add_get("/health", health)
+ app.router.add_get("/morning", morning)
+ app.router.add_get("/context/{name}", context_view)
+ return app
+
+
+async def start_http(butler: Butler, send: SendFn, host: str, port: int,
+ token: str | None) -> web.AppRunner:
+ app = build_app(butler, send, token)
+ runner = web.AppRunner(app)
+ await runner.setup()
+ site = web.TCPSite(runner, host, port)
+ await site.start()
+ log.info("HTTP endpoints on http://%s:%s (/morning, /context/<name>)", host, port)
+ return runner
--- /dev/null
+"""LLM interaction via ollama: intent classification, Q&A over notes, embeddings.
+
+All user-facing text is Spanish. Intent classification uses ollama's structured
+output (a JSON schema passed as `format`) so we get a reliable, parseable object.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass
+from datetime import datetime
+
+from ollama import AsyncClient
+
+# Small local LLMs sometimes read a past-tense copula ("¿cuál ERA la
+# contraseña?") as "something that no longer holds" and refuse to answer, even
+# though the same question in present tense works. Soften past ser/estar to
+# present in the *question we ask the model* (retrieval is unaffected).
+_PAST_COPULA = {
+ "era": "es", "eran": "son", "fue": "es", "fueron": "son",
+ "estaba": "está", "estaban": "están", "estuvo": "está", "estuvieron": "están",
+}
+_COPULA_RE = re.compile(r"\b(" + "|".join(_PAST_COPULA) + r")\b", re.IGNORECASE)
+
+
+def _soften_tense(question: str) -> str:
+ return _COPULA_RE.sub(lambda m: _PAST_COPULA[m.group(1).lower()], question)
+
+# Schema for the intent classifier. ollama enforces this via constrained
+# decoding, so the model is forced to emit a matching JSON object.
+INTENT_SCHEMA = {
+ "type": "object",
+ "properties": {
+ "action": {
+ "type": "string",
+ "enum": [
+ "note", # store something to remember
+ "reminder", # schedule a reminder for a time
+ "question", # ask about stored notes
+ "list_reminders", # show pending reminders
+ "list_notes", # show recent notes
+ "summary", # morning-style briefing
+ "chitchat", # anything else / small talk
+ ],
+ },
+ "note_text": {"type": "string"},
+ "reminder_text": {"type": "string"},
+ "when": {"type": "string"}, # natural-language time phrase (Spanish)
+ "query": {"type": "string"}, # search query for a question
+ "reply": {"type": "string"}, # direct reply for chitchat
+ },
+ "required": ["action"],
+}
+
+_CLASSIFY_SYSTEM = """\
+Eres Segismundo, el asistente personal de tu dueño. Recibes un mensaje (texto o \
+una nota de voz transcrita) y debes clasificar su intención y extraer los datos \
+relevantes. Responde SIEMPRE en español.
+
+La fecha y hora actuales son: {now}. Zona horaria: {tz}.
+
+Clasifica la intención en uno de estos valores de "action":
+- "note": el usuario quiere guardar una nota, idea o información para recordarla \
+luego. Pon el contenido limpio de la nota en "note_text".
+- "reminder": el usuario quiere que le recuerdes algo en un momento concreto. \
+Pon QUÉ recordar en "reminder_text" y la expresión temporal tal cual (p.ej. \
+"mañana a las 9", "en 2 horas", "el viernes") en "when".
+- "question": el usuario pregunta algo sobre sus notas guardadas. Pon una buena \
+consulta de búsqueda en "query".
+- "list_reminders": el usuario quiere ver sus recordatorios pendientes.
+- "list_notes": el usuario quiere ver sus notas recientes.
+- "summary": el usuario pide un resumen de lo pendiente.
+- "chitchat": saludo, charla o cualquier otra cosa. Pon tu respuesta breve en "reply".
+
+Reglas:
+- Si dudas entre "note" y "reminder": si hay una referencia temporal clara para \
+avisar, es "reminder"; si no, es "note".
+- Si el mensaje es una reflexión larga o un volcado de ideas (pensar en voz \
+alta), es "note" aunque mencione cosas por hacer; conserva la idea completa.
+- No inventes contenido. Extrae solo lo que dijo el usuario.
+- En "reminder_text" pon SOLO la tarea, sin la fecha ni verbos como "recuérdame".
+- En "note_text" pon SOLO el contenido, sin verbos como "apunta" o "guarda".
+
+Ejemplos:
+- "recuérdame mañana a las 9 llamar al fontanero" -> \
+{{"action":"reminder","reminder_text":"llamar al fontanero","when":"mañana a las 9"}}
+- "apunta que la wifi del hotel es Familia2024" -> \
+{{"action":"note","note_text":"la wifi del hotel es Familia2024"}}
+- "¿cuándo caduca mi pasaporte?" -> \
+{{"action":"question","query":"caducidad del pasaporte"}}
+- "¿estás operativo?" -> {{"action":"chitchat","reply":"Sí, aquí estoy."}}
+"""
+
+_ANSWER_SYSTEM = """\
+Eres Segismundo, el asistente personal de tu dueño. Responde su pregunta \
+basándote en las notas que te paso como contexto. Responde en español y sé \
+conciso.
+
+Razona de forma sensata sobre las notas aunque la pregunta use palabras \
+distintas a las de la nota. Por ejemplo, si una nota dice «la wifi del hotel es \
+Familia2024» y preguntan por «la contraseña/clave del wifi», la respuesta es \
+«Familia2024». Trata los datos de la forma natural en que se usan.
+
+El tiempo verbal de la pregunta da igual: «¿cuál era…?», «¿cuál fue…?» y \
+«¿cuál es…?» piden lo mismo; no asumas que algo ya no es válido por estar en \
+pasado.
+
+Solo responde que no tienes información ("No tengo ninguna nota sobre eso") si \
+de verdad NINGUNA nota está relacionada con la pregunta. No inventes datos que \
+no aparezcan ni se deduzcan de las notas.
+"""
+
+_CHAT_SYSTEM = """\
+Eres Segismundo, el mayordomo personal y asistente de tu dueño, que vive en una \
+Raspberry Pi y le ayuda por Telegram. Mantén una conversación natural, cercana y \
+breve, en español. Puedes responder preguntas generales y sencillas con lo que \
+sabes. Si te preguntan si estás funcionando o operativo, confirma con naturalidad \
+que sí, que estás a su servicio. No te inventes notas ni recordatorios del usuario: \
+para eso él te los pide explícitamente.
+"""
+
+
+@dataclass
+class Intent:
+ action: str
+ note_text: str = ""
+ reminder_text: str = ""
+ when: str = ""
+ query: str = ""
+ reply: str = ""
+
+
+class LLM:
+ def __init__(self, host: str, model: str, embed_model: str,
+ use_embeddings: bool, tz):
+ self.client = AsyncClient(host=host)
+ self.model = model
+ self.embed_model = embed_model
+ self.use_embeddings = use_embeddings
+ self.tz = tz
+
+ async def classify(self, text: str) -> Intent:
+ now = datetime.now(self.tz).strftime("%A %d/%m/%Y %H:%M")
+ system = _CLASSIFY_SYSTEM.format(now=now, tz=str(self.tz))
+ resp = await self.client.chat(
+ model=self.model,
+ messages=[
+ {"role": "system", "content": system},
+ {"role": "user", "content": text},
+ ],
+ format=INTENT_SCHEMA,
+ options={"temperature": 0},
+ )
+ raw = resp["message"]["content"]
+ try:
+ data = json.loads(raw)
+ except json.JSONDecodeError:
+ return Intent(action="chitchat", reply="Perdona, no te he entendido. ¿Puedes repetirlo?")
+ return Intent(
+ action=data.get("action", "chitchat"),
+ note_text=data.get("note_text", "").strip(),
+ reminder_text=data.get("reminder_text", "").strip(),
+ when=data.get("when", "").strip(),
+ query=data.get("query", "").strip(),
+ reply=data.get("reply", "").strip(),
+ )
+
+ async def answer(self, question: str, notes: list[str]) -> str:
+ question = _soften_tense(question)
+ if notes:
+ context = "\n".join(f"- {n}" for n in notes)
+ user = f"NOTAS:\n{context}\n\nPREGUNTA: {question}"
+ else:
+ user = (
+ f"No hay notas relevantes guardadas.\n\nPREGUNTA: {question}\n"
+ "Responde que no tienes notas sobre eso."
+ )
+ resp = await self.client.chat(
+ model=self.model,
+ messages=[
+ {"role": "system", "content": _ANSWER_SYSTEM},
+ {"role": "user", "content": user},
+ ],
+ options={"temperature": 0.2},
+ )
+ return resp["message"]["content"].strip()
+
+ async def chat(self, text: str) -> str:
+ """Free-form conversational reply for small talk and simple questions."""
+ resp = await self.client.chat(
+ model=self.model,
+ messages=[
+ {"role": "system", "content": _CHAT_SYSTEM},
+ {"role": "user", "content": text},
+ ],
+ options={"temperature": 0.5},
+ )
+ return resp["message"]["content"].strip()
+
+ async def briefing(self, prompt: str) -> str:
+ resp = await self.client.chat(
+ model=self.model,
+ messages=[
+ {"role": "system", "content":
+ "Eres Segismundo. Redacta un resumen matutino breve, claro y "
+ "motivador en español a partir de los datos que te paso."},
+ {"role": "user", "content": prompt},
+ ],
+ options={"temperature": 0.3},
+ )
+ return resp["message"]["content"].strip()
+
+ async def suggest_tags(self, content: str) -> list[str]:
+ """Best-effort 1-3 short topic tags for a note (for browsing by theme)."""
+ schema = {
+ "type": "object",
+ "properties": {
+ "tags": {"type": "array", "items": {"type": "string"}},
+ },
+ "required": ["tags"],
+ }
+ system = (
+ "Asigna de 1 a 3 etiquetas temáticas muy cortas (una palabra cada "
+ "una, en minúscula y en español, sin tildes si puedes) que clasifiquen "
+ "la nota para encontrarla luego por tema. Ejemplos de etiquetas: "
+ "jardin, trabajo, compras, salud, viaje, ideas."
+ )
+ try:
+ resp = await self.client.chat(
+ model=self.model,
+ messages=[
+ {"role": "system", "content": system},
+ {"role": "user", "content": content},
+ ],
+ format=schema,
+ options={"temperature": 0},
+ )
+ tags = json.loads(resp["message"]["content"]).get("tags", [])
+ except Exception:
+ return []
+ clean: list[str] = []
+ for t in tags:
+ slug = re.sub(r"[^\w]+", "-", str(t).strip().lower()).strip("-")
+ if slug and slug not in clean:
+ clean.append(slug)
+ return clean[:3]
+
+ async def embed(self, text: str) -> list[float] | None:
+ if not self.use_embeddings:
+ return None
+ resp = await self.client.embeddings(model=self.embed_model, prompt=text)
+ return list(resp["embedding"])
--- /dev/null
+"""Background scheduler that fires due reminders over Telegram."""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from datetime import datetime, timezone
+from typing import Awaitable, Callable
+
+from .db import Database, Reminder
+
+log = logging.getLogger("segismundo.reminders")
+
+# Called once per due reminder; responsible for delivering it (with buttons).
+NotifyFn = Callable[[Reminder], Awaitable[None]]
+
+
+async def scheduler_loop(db: Database, notify: NotifyFn, tz,
+ poll_seconds: float = 30) -> None:
+ log.info("Reminder scheduler started (poll every %ss)", poll_seconds)
+ while True:
+ try:
+ now = datetime.now(timezone.utc)
+ for r in db.due_reminders(now):
+ try:
+ await notify(r)
+ db.mark_notified(r.id)
+ log.info("Fired reminder #%s", r.id)
+ except Exception:
+ log.exception("Failed to send reminder #%s; will retry", r.id)
+ except Exception:
+ log.exception("Scheduler iteration failed")
+ await asyncio.sleep(poll_seconds)
--- /dev/null
+"""Parse Spanish natural-language time phrases into a concrete datetime.
+
+Uses `dateparser` with Spanish settings and a "prefer the future" bias so that
+"a las 9" means the next 9 o'clock, not one in the past.
+
+dateparser handles *dates* ("mañana", "el viernes", "en 2 horas") well but is
+weak on Spanish *colloquial clock times* — especially spoken ones, which arrive
+spelled out from Whisper ("a la una", "a las nueve y media", "a las cinco menos
+cuarto de la tarde"). We normalise those to an explicit "HH:MM" token first.
+"""
+
+from __future__ import annotations
+
+import re
+from datetime import datetime, timedelta
+
+import dateparser
+
+_BARE_TIME_RE = re.compile(r"\b(\d{1,2}):(\d{2})\b")
+
+# Spanish number words we need for clock times (hours 0-24 and common minutes).
+_NUM = {
+ "cero": 0, "una": 1, "uno": 1, "un": 1, "dos": 2, "tres": 3, "cuatro": 4,
+ "cinco": 5, "seis": 6, "siete": 7, "ocho": 8, "nueve": 9, "diez": 10,
+ "once": 11, "doce": 12, "trece": 13, "catorce": 14, "quince": 15,
+ "dieciseis": 16, "dieciséis": 16, "diecisiete": 17, "dieciocho": 18,
+ "diecinueve": 19, "veinte": 20, "veintiuna": 21, "veintiuno": 21,
+ "veintidos": 22, "veintidós": 22, "veintitres": 23, "veintitrés": 23,
+ "veinticuatro": 24, "veinticinco": 25, "treinta": 30, "media": 30,
+ "cuarto": 15,
+}
+# Alternation of number words, longest first so "veinticinco" beats "veinti…".
+_NUM_WORDS = "|".join(sorted(_NUM, key=len, reverse=True))
+# An hour or minute token: digits or a Spanish number word.
+_VAL = rf"\d{{1,2}}|{_NUM_WORDS}"
+
+# Matches a colloquial clock time, optionally introduced by "a la(s)".
+# Examples: "a la una", "a las 9", "las nueve y media", "5 menos cuarto de la tarde".
+_TIME_RE = re.compile(
+ rf"(?:a\s+)?la(?:s)?\s+"
+ rf"(?P<h>{_VAL})"
+ rf"(?:"
+ rf"[:.](?P<min>\d{{2}})"
+ rf"|\s+y\s+(?P<plus>{_VAL})"
+ rf"|\s+menos\s+(?P<minus>{_VAL})"
+ rf")?"
+ rf"(?:\s+de\s+la\s+(?P<period>mañana|tarde|noche|madrugada))?",
+ re.IGNORECASE,
+)
+
+_WEEKDAYS = r"lunes|martes|mi[ée]rcoles|jueves|viernes|s[áa]bado|domingo"
+
+# dateparser fails when a weekday is preceded by an article ("el viernes" ->
+# None, but "viernes" works). Strip the article in that position.
+_WEEKDAY_ARTICLE_RE = re.compile(rf"\b(?:el|la|los|las)\s+({_WEEKDAYS})\b", re.IGNORECASE)
+
+# dateparser also chokes on "próximo X" and "X que viene" — but a bare weekday
+# already resolves to the *next* one (PREFER_DATES_FROM=future). Normalise these.
+_NEXT_WEEKDAY_RE = re.compile(
+ rf"\b(?:pr[óo]xim[oa]\s+)?({_WEEKDAYS})(?:\s+que\s+viene)?\b", re.IGNORECASE
+)
+_RELATIVE_PHRASES = [
+ (re.compile(r"\b(?:la\s+)?(?:pr[óo]xima\s+semana|semana\s+que\s+viene)\b", re.I),
+ "en 7 días"),
+ (re.compile(r"\b(?:el\s+)?(?:pr[óo]ximo\s+mes|mes\s+que\s+viene)\b", re.I),
+ "en 30 días"),
+]
+# True when a phrase carries no explicit clock time -> we default it to 09:00.
+_HAS_TIME_RE = re.compile(r"\b\d{1,2}:\d{2}\b")
+
+
+def _val(token: str | None) -> int | None:
+ if not token:
+ return None
+ token = token.lower()
+ if token.isdigit():
+ return int(token)
+ return _NUM.get(token)
+
+
+def _normalize_es_time(phrase: str) -> str:
+ # Fixed expressions first.
+ phrase = re.sub(r"\bmediod[íi]a\b", "12:00", phrase, flags=re.IGNORECASE)
+ phrase = re.sub(r"\bmedianoche\b", "00:00", phrase, flags=re.IGNORECASE)
+ for rx, repl_str in _RELATIVE_PHRASES:
+ phrase = rx.sub(repl_str, phrase)
+ phrase = _WEEKDAY_ARTICLE_RE.sub(r"\1", phrase)
+ phrase = _NEXT_WEEKDAY_RE.sub(r"\1", phrase)
+
+ def repl(m: re.Match) -> str:
+ hour = _val(m.group("h"))
+ if hour is None:
+ return m.group(0)
+ minute = 0
+ if m.group("min"):
+ minute = int(m.group("min"))
+ elif m.group("plus"): # "y media" / "y cuarto" / "y veinte"
+ minute = _val(m.group("plus")) or 0
+ elif m.group("minus"): # "menos cuarto" / "menos diez"
+ sub = _val(m.group("minus")) or 0
+ hour = (hour - 1) % 24
+ minute = (60 - sub) % 60
+
+ period = (m.group("period") or "").lower()
+ if period in ("tarde", "noche") and hour < 12:
+ hour += 12
+ elif period in ("mañana", "madrugada") and hour == 12:
+ hour = 0
+
+ hour = min(max(hour, 0), 23)
+ minute = min(max(minute, 0), 59)
+ return f"{hour:02d}:{minute:02d}"
+
+ return _TIME_RE.sub(repl, phrase)
+
+
+_LAST_N_DAYS_RE = re.compile(r"\búltim[oa]s?\s+(\d{1,3})\s+d[íi]as?\b", re.IGNORECASE)
+
+
+def _add_month(d: datetime) -> datetime:
+ # d is always a day-1 date here, so no day overflow.
+ if d.month == 12:
+ return d.replace(year=d.year + 1, month=1)
+ return d.replace(month=d.month + 1)
+
+
+def _sub_month(d: datetime) -> datetime:
+ if d.month == 1:
+ return d.replace(year=d.year - 1, month=12)
+ return d.replace(month=d.month - 1)
+
+
+def parse_date_range(text: str, tz) -> tuple[datetime, datetime] | None:
+ """Detect a Spanish time period and return a [start, end) datetime window.
+
+ Recognises hoy / ayer / anteayer / últimos N días / esta semana /
+ semana pasada / este mes / mes pasado / este año / año pasado.
+ """
+ t = text.lower()
+ now = datetime.now(tz)
+ today = now.replace(hour=0, minute=0, second=0, microsecond=0)
+
+ def day(n: int) -> datetime:
+ return today + timedelta(days=n)
+
+ if re.search(r"\bhoy\b", t):
+ return today, day(1)
+ if re.search(r"\banteayer\b|\bantes\s+de\s+ayer\b", t):
+ return day(-2), day(-1)
+ if re.search(r"\bayer\b", t):
+ return day(-1), today
+
+ m = _LAST_N_DAYS_RE.search(t)
+ if m:
+ return day(-int(m.group(1))), day(1)
+
+ monday = today - timedelta(days=today.weekday())
+ if re.search(r"\b(?:esta\s+semana|semana\s+actual)\b", t):
+ return monday, monday + timedelta(days=7)
+ if re.search(r"\b(?:la\s+)?semana\s+pasada\b", t):
+ return monday - timedelta(days=7), monday
+
+ first = today.replace(day=1)
+ if re.search(r"\b(?:este\s+mes|mes\s+actual)\b", t):
+ return first, _add_month(first)
+ if re.search(r"\b(?:el\s+)?mes\s+pasado\b", t):
+ return _sub_month(first), first
+
+ jan = today.replace(month=1, day=1)
+ if re.search(r"\beste\s+a[ñn]o\b", t):
+ return jan, jan.replace(year=jan.year + 1)
+ if re.search(r"\b(?:el\s+)?a[ñn]o\s+pasado\b", t):
+ return jan.replace(year=jan.year - 1), jan
+
+ return None
+
+
+def parse_when(phrase: str, tz) -> datetime | None:
+ if not phrase:
+ return None
+ normalized = _normalize_es_time(phrase)
+ settings = {
+ "TIMEZONE": str(tz),
+ "TO_TIMEZONE": str(tz),
+ "RETURN_AS_TIMEZONE_AWARE": True,
+ "PREFER_DATES_FROM": "future",
+ "RELATIVE_BASE": datetime.now(tz),
+ }
+ dt = dateparser.parse(normalized, languages=["es"], settings=settings)
+ if dt is not None:
+ # A date with no explicit clock time resolves to midnight; a reminder at
+ # 00:00 is rarely what you want, so default those to 09:00.
+ if (dt.hour, dt.minute, dt.second) == (0, 0, 0) and not _HAS_TIME_RE.search(normalized):
+ dt = dt.replace(hour=9)
+ return dt
+
+ # Fallback for a bare clock time with no date ("a la una", "a mediodía"):
+ # dateparser returns None, so resolve to the next occurrence ourselves.
+ m = _BARE_TIME_RE.search(normalized)
+ if m:
+ now = datetime.now(tz)
+ cand = now.replace(hour=int(m.group(1)), minute=int(m.group(2)),
+ second=0, microsecond=0)
+ if cand <= now:
+ cand += timedelta(days=1)
+ return cand
+ return None
--- /dev/null
+"""Voice-note transcription: Telegram OGG/Opus -> 16 kHz WAV (ffmpeg) -> text.
+
+Calls the whisper.cpp `whisper-cli` binary as a subprocess. We run it off the
+event loop with asyncio.to_thread so transcription doesn't block the bot.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+import subprocess
+import tempfile
+
+
+class Transcriber:
+ def __init__(self, whisper_bin: str, model_path: str, lang: str = "es"):
+ self.whisper_bin = whisper_bin
+ self.model_path = model_path
+ self.lang = lang
+ self.threads = max(1, (os.cpu_count() or 4))
+
+ def _run(self, audio_path: str) -> str:
+ wav_path = audio_path + ".16k.wav"
+ try:
+ # Convert whatever Telegram sent into 16 kHz mono PCM WAV.
+ subprocess.run(
+ [
+ "ffmpeg", "-nostdin", "-y", "-i", audio_path,
+ "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le", wav_path,
+ ],
+ check=True,
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL,
+ )
+ # whisper.cpp: -np (no extra prints) -nt (no timestamps) -> stdout = text
+ proc = subprocess.run(
+ [
+ self.whisper_bin,
+ "-m", self.model_path,
+ "-f", wav_path,
+ "-l", self.lang,
+ "-t", str(self.threads),
+ "-np", "-nt",
+ ],
+ check=True,
+ capture_output=True,
+ text=True,
+ )
+ return proc.stdout.strip()
+ finally:
+ for p in (wav_path,):
+ try:
+ os.remove(p)
+ except OSError:
+ pass
+
+ async def transcribe(self, audio_path: str) -> str:
+ return await asyncio.to_thread(self._run, audio_path)
+
+ @staticmethod
+ def tempfile(suffix: str = ".oga") -> str:
+ fd, path = tempfile.mkstemp(suffix=suffix, prefix="segismundo-voice-")
+ os.close(fd)
+ return path
--- /dev/null
+[Unit]
+Description=Segismundo — personal AI butler (Telegram + ollama + whisper.cpp)
+# Wait for network and the local ollama service.
+After=network-online.target ollama.service
+Wants=network-online.target
+
+[Service]
+Type=simple
+User=segismundo
+Group=segismundo
+WorkingDirectory=/opt/segismundo
+EnvironmentFile=/opt/segismundo/.env
+ExecStart=/opt/segismundo/.venv/bin/python -m segismundo
+Restart=on-failure
+RestartSec=5
+
+# Hardening (relaxed enough for whisper temp files + sqlite under /var/lib).
+NoNewPrivileges=true
+PrivateTmp=true
+ProtectSystem=full
+ProtectHome=true
+ReadWritePaths=/var/lib/segismundo
+StateDirectory=segismundo
+
+[Install]
+WantedBy=multi-user.target
--- /dev/null
+"""Tests for the Butler brain using a fake LLM — no ollama required.
+
+These verify the routing/formatting logic deterministically. The real LLM is
+exercised separately via scripts/chat-repl.py against a running ollama.
+"""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import pytest # noqa: E402
+from zoneinfo import ZoneInfo # noqa: E402
+
+from segismundo.core import Butler # noqa: E402
+from segismundo.db import Database # noqa: E402
+from segismundo.llm import Intent # noqa: E402
+
+TZ = ZoneInfo("Europe/Madrid")
+
+
+class FakeLLM:
+ """Scripts the classifier output; embed returns None; chat echoes."""
+
+ def __init__(self, intent: Intent, answer_text="respuesta"):
+ self._intent = intent
+ self._answer = answer_text
+
+ async def classify(self, text):
+ return self._intent
+
+ async def embed(self, text):
+ return None
+
+ async def suggest_tags(self, content):
+ return ["test"]
+
+ async def answer(self, question, notes):
+ return f"{self._answer} ({len(notes)} notas)"
+
+ async def chat(self, text):
+ return f"chat:{text}"
+
+
+def butler(intent, **kw):
+ db = Database(":memory:")
+ return Butler(db, FakeLLM(intent, **kw), TZ), db
+
+
+async def test_note_is_saved():
+ b, db = butler(Intent(action="note", note_text="comprar leche"))
+ reply = await b.handle_message("apunta comprar leche")
+ assert "Nota guardada" in reply
+ assert db.recent_notes()[0].content == "comprar leche"
+
+
+async def test_reminder_parsed_and_stored():
+ b, db = butler(Intent(action="reminder", reminder_text="llamar al médico",
+ when="mañana a las 9"))
+ reply = await b.handle_message("recuérdame mañana a las 9 llamar al médico")
+ assert "Recordatorio" in reply
+ assert len(db.pending_reminders()) == 1
+
+
+async def test_reminder_unparseable_falls_back_to_note():
+ b, db = butler(Intent(action="reminder", reminder_text="algo", when="qwerty"))
+ reply = await b.handle_message("recuérdame algo qwerty")
+ assert "guardado como nota" in reply
+ assert len(db.pending_reminders()) == 0
+ assert len(db.recent_notes()) == 1
+
+
+async def test_question_searches_notes():
+ b, db = butler(Intent(action="question", query="pasaporte"))
+ db.add_note("renovar el pasaporte en marzo")
+ reply = await b.handle_message("¿cuándo renovar el pasaporte?")
+ assert "notas" in reply
+
+
+async def test_interrogative_note_is_corrected_to_question():
+ # Model wrongly says "note", but the text is clearly a question.
+ b, db = butler(Intent(action="note", note_text="contraseña del wifi"))
+ db.add_note("la wifi del hotel es Familia2024")
+ reply = await b.handle_message("¿cuál era la contraseña del wifi?")
+ assert "notas" in reply # went through the Q&A path
+ assert len(db.recent_notes()) == 1 # did NOT store a new note
+
+
+async def test_long_rambling_dump_stored_verbatim_as_note():
+ # Even if the model labels a long brain-dump as a reminder, it's kept as a
+ # verbatim note so nothing is lost.
+ ramble = "vale pensando en voz alta " + ("sobre muchas cosas del jardín " * 12)
+ b, db = butler(Intent(action="reminder", reminder_text="algo", when="mañana"))
+ await b.handle_message(ramble, source="voice")
+ assert len(db.pending_reminders()) == 0
+ notes = db.recent_notes()
+ assert len(notes) == 1 and notes[0].content == ramble.strip()
+
+
+async def test_short_note_prefix_is_stripped():
+ b, db = butler(Intent(action="note", note_text="ignored"))
+ await b.handle_message("apunta que el coche está en la plaza 47")
+ assert db.recent_notes()[0].content == "el coche está en la plaza 47"
+
+
+async def test_specific_question_mislabeled_as_list_is_answered():
+ b, db = butler(Intent(action="list_notes"))
+ db.add_note("el coche está en la plaza 47")
+ reply = await b.handle_message("¿dónde aparqué el coche?")
+ assert "notas" in reply # went through Q&A (FakeLLM.answer), not the listing
+
+
+async def test_genuine_list_request_still_lists():
+ b, db = butler(Intent(action="list_notes"))
+ db.add_note("comprar pan")
+ reply = await b.handle_message("¿qué notas tengo?")
+ assert "Notas recientes" in reply
+
+
+async def test_export_topics_and_tag_browse():
+ b, db = butler(Intent(action="note"))
+ db.add_note("regar el huerto", tags="jardin riego")
+ md = b.export_markdown()
+ assert "regar el huerto" in md and "jardin" in md
+ assert "jardin" in b.list_topics()
+ assert "regar el huerto" in b.notes_for_tag("jardin")
+ assert "regar el huerto" in b.notes_for_tag("#jardin") # leading # tolerated
+
+
+async def test_date_range_recall_routes_to_range():
+ b, db = butler(Intent(action="list_notes"))
+ db.add_note("nota de hoy")
+ reply = await b.handle_message("¿qué grabé hoy?")
+ # FakeLLM.answer echoes the note count -> proves it went through _recall_range
+ assert "1 notas" in reply
+
+
+async def test_note_save_attaches_tags():
+ b, db = butler(Intent(action="note", note_text="x"))
+ await b.handle_message("apunta comprar pan")
+ assert db.recent_notes()[0].tags == "test" # FakeLLM.suggest_tags -> ["test"]
+
+
+async def test_nl_delete_reminder():
+ from datetime import datetime, timedelta, timezone
+ b, db = butler(Intent(action="chitchat"))
+ db.add_reminder("x", datetime.now(timezone.utc) + timedelta(days=1))
+ reply = await b.handle_message("borra el recordatorio número uno")
+ assert "borrad" in reply.lower()
+ assert db.pending_reminders() == []
+
+
+async def test_nl_delete_note_then_undo_restores():
+ b, db = butler(Intent(action="chitchat"))
+ db.add_note("hola") # id 1
+ await b.handle_message("quita la nota guardada número 1, bórrala")
+ assert db.recent_notes() == []
+ reply = await b.handle_message("deshaz")
+ assert "restaurad" in reply.lower()
+ assert len(db.recent_notes()) == 1
+
+
+async def test_undo_of_created_note_removes_it():
+ b, db = butler(Intent(action="note", note_text="x"))
+ await b.handle_message("apunta comprar pan")
+ assert len(db.recent_notes()) == 1
+ await b.handle_message("me he equivocado, deshaz")
+ assert db.recent_notes() == []
+
+
+async def test_delete_false_positive_is_kept_as_note():
+ # "apunta que … borrar …" is a note, not a delete command.
+ b, db = butler(Intent(action="note", note_text="x"))
+ await b.handle_message("apunta que tengo que borrar los correos viejos")
+ assert len(db.recent_notes()) == 1
+
+
+async def test_delete_last_note():
+ b, db = butler(Intent(action="chitchat"))
+ db.add_note("a")
+ db.add_note("b") # most recent
+ await b.handle_message("borra la última nota")
+ assert [n.content for n in db.recent_notes()] == ["a"]
+
+
+async def test_undo_done_reminder():
+ from datetime import datetime, timedelta, timezone
+ b, db = butler(Intent(action="chitchat"))
+ rid = db.add_reminder("x", datetime.now(timezone.utc) + timedelta(days=1))
+ b.mark_reminder_done(rid)
+ assert db.pending_reminders() == []
+ await b.handle_message("deshaz")
+ assert len(db.pending_reminders()) == 1
+
+
+async def test_undo_nothing_to_undo():
+ b, _ = butler(Intent(action="chitchat"))
+ assert "nada que deshacer" in (await b.handle_message("deshaz")).lower()
+
+
+async def test_append_to_note():
+ b, db = butler(Intent(action="chitchat"))
+ db.add_note("pan, leche") # id 1
+ await b.handle_message("añade a la nota 1 que también compre huevos")
+ content = db.get_note(1).content
+ assert "huevos" in content and "pan, leche" in content # appended, not replaced
+
+
+async def test_replace_note():
+ b, db = butler(Intent(action="chitchat"))
+ db.add_note("viejo") # id 1
+ await b.handle_message("cambia la nota 1 a contenido nuevo")
+ assert db.get_note(1).content == "contenido nuevo"
+
+
+async def test_edit_reminder_time():
+ from datetime import datetime, timedelta, timezone
+ b, db = butler(Intent(action="chitchat"))
+ db.add_reminder("cita", datetime.now(timezone.utc) + timedelta(days=1)) # id 1
+ await b.handle_message("cambia el recordatorio 1 a las cinco de la tarde")
+ due = datetime.fromisoformat(db.get_reminder(1).due_at).astimezone(TZ)
+ assert due.hour == 17
+
+
+async def test_undo_edit_note_restores_content():
+ b, db = butler(Intent(action="chitchat"))
+ db.add_note("original") # id 1
+ await b.handle_message("cambia la nota 1 a modificado")
+ assert db.get_note(1).content == "modificado"
+ await b.handle_message("deshaz")
+ assert db.get_note(1).content == "original"
+
+
+async def test_borrador_word_does_not_trigger_delete():
+ # "borrador" (draft) contains "borra" but must not delete.
+ b, db = butler(Intent(action="chitchat"))
+ db.add_note("x") # id 1
+ await b.handle_message("añade a la nota 1 revisar el borrador del informe")
+ assert db.get_note(1) is not None
+ assert "borrador" in db.get_note(1).content
+
+
+async def test_anade_una_nota_creates_note_not_edit():
+ # "añade una nota que diga que …" is note CREATION, not an append to note #N.
+ # The body's "6" / "una" must not be mistaken for a note id.
+ b, db = butler(Intent(action="note", note_text="ignored"))
+ reply = await b.handle_message(
+ "Añade una nota que diga que Rebecca quiere un paquete de 6 bricks de "
+ "leche de marca agaza que sea sin lactosa y semidesnatada."
+ )
+ assert "Nota guardada" in reply
+ assert "No encontré" not in reply
+ notes = db.recent_notes()
+ assert len(notes) == 1
+ assert notes[0].content.startswith("Rebecca quiere un paquete de 6 bricks")
+
+
+async def test_garbled_anade_note_wrapper_is_stripped():
+ # Whisper mishears "Añade una" as "Allá de una"; the wrapper still gets
+ # stripped so the note reads cleanly.
+ b, db = butler(Intent(action="note", note_text="ignored"))
+ await b.handle_message(
+ "Allá de una nota que diga que Rebecca quiere leche sin lactosa."
+ )
+ assert db.recent_notes()[0].content == "Rebecca quiere leche sin lactosa."
+
+
+async def test_append_to_note_still_works_after_create_guard():
+ # The create guard must not swallow a genuine append to an existing note.
+ b, db = butler(Intent(action="chitchat"))
+ db.add_note("pan, leche") # id 1
+ await b.handle_message("añade a la nota 1 que también compre huevos")
+ content = db.get_note(1).content
+ assert "huevos" in content and "pan, leche" in content
+
+
+async def test_context_detected_on_note():
+ b, db = butler(Intent(action="note", note_text="x"))
+ await b.handle_message("apunta que el parking está en la calle Goya en Madrid")
+ assert db.recent_notes()[0].context == "madrid"
+
+
+async def test_context_detected_on_reminder():
+ b, db = butler(Intent(action="reminder", reminder_text="llamar al fontanero",
+ when="mañana a las 9"))
+ await b.handle_message("recuérdame mañana a las 9 llamar al fontanero en Burgos")
+ rems = db.pending_reminders()
+ assert len(rems) == 1 and rems[0].context == "burgos"
+
+
+async def test_no_context_is_general():
+ b, db = butler(Intent(action="note", note_text="x"))
+ await b.handle_message("apunta comprar pan")
+ assert db.recent_notes()[0].context == "general"
+
+
+async def test_list_reminders_filtered_by_context():
+ from datetime import datetime, timedelta, timezone
+ b, db = butler(Intent(action="list_reminders"))
+ db.add_reminder("paquete", datetime.now(timezone.utc) + timedelta(days=1), context="madrid")
+ db.add_reminder("fontanero", datetime.now(timezone.utc) + timedelta(days=1), context="burgos")
+ reply = await b.handle_message("¿qué recordatorios tengo en Madrid?")
+ assert "paquete" in reply and "fontanero" not in reply
+
+
+async def test_move_note_to_context():
+ b, db = butler(Intent(action="chitchat"))
+ db.add_note("x") # general, id 1
+ await b.handle_message("mueve la nota 1 a Madrid")
+ assert db.get_note(1).context == "madrid"
+
+
+async def test_cambia_contexto_moves_not_edits():
+ b, db = butler(Intent(action="chitchat"))
+ db.add_note("contenido importante") # id 1
+ await b.handle_message("cambia el contexto de la nota 1 a burgos")
+ assert db.get_note(1).context == "burgos"
+ assert db.get_note(1).content == "contenido importante" # not overwritten
+
+
+async def test_move_to_general_and_undo():
+ b, db = butler(Intent(action="chitchat"))
+ db.add_note("x", context="madrid") # id 1
+ await b.handle_message("mueve la nota 1 a general")
+ assert db.get_note(1).context == "general"
+ await b.handle_message("deshaz")
+ assert db.get_note(1).context == "madrid"
+
+
+async def test_chitchat_uses_chat():
+ b, _ = butler(Intent(action="chitchat"))
+ reply = await b.handle_message("¿estás funcionando?")
+ assert reply.startswith("chat:")
--- /dev/null
+"""Offline tests for the storage layer — no ollama/Telegram needed."""
+
+import os
+import sys
+from datetime import datetime, timedelta, timezone
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from segismundo.db import Database # noqa: E402
+
+
+def _db(tmp_path):
+ return Database(str(tmp_path / "test.db"))
+
+
+def test_add_and_recent_notes(tmp_path):
+ db = _db(tmp_path)
+ db.add_note("comprar pan", source="text")
+ db.add_note("llamar a mamá", source="voice")
+ notes = db.recent_notes(limit=10)
+ assert len(notes) == 2
+ assert notes[0].content == "llamar a mamá" # most recent first
+
+
+def test_fts_search_diacritics(tmp_path):
+ db = _db(tmp_path)
+ db.add_note("Tengo que renovar el pasaporte en marzo")
+ # remove_diacritics 2 => accent-insensitive match
+ hits = db.search_fts("pasaporte")
+ assert len(hits) == 1
+ hits2 = db.search_fts("renovar marzo")
+ assert len(hits2) == 1
+
+
+def test_semantic_search(tmp_path):
+ db = _db(tmp_path)
+ db.add_note("idea de regalo: auriculares", embedding=[1.0, 0.0, 0.0])
+ db.add_note("receta de tortilla", embedding=[0.0, 1.0, 0.0])
+ hits = db.search_semantic([0.9, 0.1, 0.0], limit=1)
+ assert hits[0].content == "idea de regalo: auriculares"
+
+
+def test_tags_browse(tmp_path):
+ db = _db(tmp_path)
+ db.add_note("regar el huerto", tags="jardin riego")
+ db.add_note("comprar pan", tags="compras")
+ assert [n.content for n in db.notes_by_tag("jardin")] == ["regar el huerto"]
+ assert [n.content for n in db.notes_by_tag("riego")] == ["regar el huerto"]
+ tags = dict(db.all_tags())
+ assert tags["jardin"] == 1 and tags["compras"] == 1
+ # tag match is whole-token, not substring
+ assert db.notes_by_tag("jard") == []
+
+
+def test_notes_in_range(tmp_path):
+ db = _db(tmp_path)
+ db.add_note("x")
+ now = datetime.now(timezone.utc)
+ assert len(db.notes_in_range(now - timedelta(hours=1), now + timedelta(hours=1))) == 1
+ assert len(db.notes_in_range(now + timedelta(hours=1), now + timedelta(hours=2))) == 0
+
+
+def test_migration_adds_tags_column(tmp_path):
+ # Simulate an old DB without the tags column, then reopen via Database.
+ import sqlite3
+ p = str(tmp_path / "old.db")
+ c = sqlite3.connect(p)
+ c.execute("CREATE TABLE notes(id INTEGER PRIMARY KEY, content TEXT NOT NULL, "
+ "source TEXT, created_at TEXT NOT NULL, embedding BLOB)")
+ c.execute("INSERT INTO notes(content, source, created_at) VALUES('viejo','text','2020-01-01T00:00:00+00:00')")
+ c.commit()
+ c.close()
+ db = Database(p) # _migrate() should add the column
+ notes = db.recent_notes()
+ assert notes[0].content == "viejo" and notes[0].tags == ""
+
+
+def test_context_filtering(tmp_path):
+ db = _db(tmp_path)
+ db.add_note("a", context="madrid")
+ db.add_note("b", context="burgos")
+ db.add_note("c") # general
+ assert [n.content for n in db.recent_notes(context="madrid")] == ["a"]
+ assert [n.content for n in db.recent_notes(context="general")] == ["c"]
+ assert len(db.recent_notes()) == 3 # no filter -> all
+
+ db.add_reminder("r1", datetime.now(timezone.utc) + timedelta(days=1), context="madrid")
+ db.add_reminder("r2", datetime.now(timezone.utc) + timedelta(days=1), context="burgos")
+ assert [r.text for r in db.pending_reminders(context="madrid")] == ["r1"]
+ assert len(db.pending_reminders()) == 2
+
+
+def test_update_note(tmp_path):
+ db = _db(tmp_path)
+ nid = db.add_note("a", tags="t1")
+ assert db.update_note(nid, "b", None, "t2")
+ n = db.get_note(nid)
+ assert n.content == "b" and n.tags == "t2"
+
+
+def test_update_reminder_snooze(tmp_path):
+ db = _db(tmp_path)
+ rid = db.add_reminder("x", datetime.now(timezone.utc) + timedelta(days=1))
+ new = datetime.now(timezone.utc) + timedelta(hours=2)
+ assert db.update_reminder(rid, due_at_utc=new, clear_notified=True)
+ r = db.get_reminder(rid)
+ assert abs((datetime.fromisoformat(r.due_at) - new).total_seconds()) < 2
+
+
+def test_soft_deleted_reminder_not_editable(tmp_path):
+ db = _db(tmp_path)
+ rid = db.add_reminder("x", datetime.now(timezone.utc) + timedelta(days=1))
+ db.soft_delete_reminder(rid)
+ assert db.get_reminder(rid) is None # filtered out
+ assert db.update_reminder(rid, text="y") is False
+
+
+def test_reminders_lifecycle(tmp_path):
+ db = _db(tmp_path)
+ past = datetime.now(timezone.utc) - timedelta(minutes=5)
+ future = datetime.now(timezone.utc) + timedelta(days=1)
+ r1 = db.add_reminder("revisar correo", past)
+ db.add_reminder("dentista", future)
+
+ due = db.due_reminders(datetime.now(timezone.utc))
+ assert len(due) == 1 and due[0].id == r1
+
+ db.mark_notified(r1)
+ assert db.due_reminders(datetime.now(timezone.utc)) == []
+
+ assert len(db.pending_reminders()) == 2
+ assert db.mark_done(r1) is True
+ assert len(db.pending_reminders()) == 1
+
+
+if __name__ == "__main__":
+ import tempfile
+ from pathlib import Path
+
+ with tempfile.TemporaryDirectory() as d:
+ p = Path(d)
+ test_add_and_recent_notes(p)
+ test_fts_search_diacritics(Path(tempfile.mkdtemp()))
+ test_semantic_search(Path(tempfile.mkdtemp()))
+ test_reminders_lifecycle(Path(tempfile.mkdtemp()))
+ print("OK — db smoke tests passed. For the full suite run: pytest -q")
--- /dev/null
+"""Tests for the HTTP morning endpoint and the reminder scheduler (no ollama)."""
+
+import asyncio
+import os
+import sys
+from datetime import datetime, timedelta, timezone
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import pytest # noqa: E402
+from aiohttp.test_utils import TestClient, TestServer # noqa: E402
+from zoneinfo import ZoneInfo # noqa: E402
+
+from segismundo.core import Butler # noqa: E402
+from segismundo.db import Database # noqa: E402
+from segismundo.http_api import build_app # noqa: E402
+from segismundo.reminders import scheduler_loop # noqa: E402
+
+TZ = ZoneInfo("Europe/Madrid")
+
+
+class StubLLM:
+ async def embed(self, text):
+ return None
+
+ async def suggest_tags(self, content):
+ return []
+
+
+def _butler():
+ db = Database(":memory:")
+ return Butler(db, StubLLM(), TZ), db
+
+
+async def test_morning_endpoint_requires_token():
+ butler, _ = _butler()
+ sent = []
+
+ async def send(msg):
+ sent.append(msg)
+
+ app = build_app(butler, send, token="secret")
+ async with TestClient(TestServer(app)) as client:
+ # No token -> 401
+ resp = await client.get("/morning")
+ assert resp.status == 401
+ # Correct token -> 200 + pushed to Telegram
+ resp = await client.get("/morning", headers={"Authorization": "Bearer secret"})
+ assert resp.status == 200
+ body = await resp.json()
+ assert "text" in body and "overdue" in body
+ assert len(sent) == 1
+
+
+async def test_morning_endpoint_lists_reminders():
+ butler, db = _butler()
+ db.add_reminder("pagar el alquiler", datetime.now(timezone.utc) - timedelta(hours=1))
+
+ async def send(_msg):
+ pass
+
+ app = build_app(butler, send, token=None) # open
+ async with TestClient(TestServer(app)) as client:
+ resp = await client.get("/morning?push=0")
+ body = await resp.json()
+ assert len(body["overdue"]) == 1
+ assert body["overdue"][0]["text"] == "pagar el alquiler"
+
+
+async def test_context_endpoint():
+ butler, db = _butler()
+ db.add_reminder("paquete", datetime.now(timezone.utc) + timedelta(days=1), context="madrid")
+ db.add_note("parking goya", context="madrid")
+ db.add_reminder("fontanero", datetime.now(timezone.utc) + timedelta(days=1), context="burgos")
+
+ async def send(_msg):
+ pass
+
+ app = build_app(butler, send, token="secret")
+ async with TestClient(TestServer(app)) as client:
+ assert (await client.get("/context/madrid")).status == 401 # no token
+ resp = await client.get("/context/madrid?push=0",
+ headers={"Authorization": "Bearer secret"})
+ assert resp.status == 200
+ body = await resp.json()
+ assert body["context"] == "madrid"
+ assert any("paquete" in r["text"] for r in body["reminders"])
+ assert not any("fontanero" in r["text"] for r in body["reminders"])
+ assert any("parking" in n["content"] for n in body["notes"])
+ # unknown context -> 404
+ assert (await client.get("/context/sevilla?token=secret")).status == 404
+
+
+async def test_scheduler_fires_due_reminder():
+ _, db = _butler()
+ db.add_reminder("sacar la basura", datetime.now(timezone.utc) - timedelta(minutes=1))
+ sent = []
+
+ async def notify(rem):
+ sent.append(rem.text)
+
+ task = asyncio.create_task(scheduler_loop(db, notify, TZ, poll_seconds=0.05))
+ await asyncio.sleep(0.2)
+ task.cancel()
+ try:
+ await task
+ except asyncio.CancelledError:
+ pass
+
+ assert any("sacar la basura" in m for m in sent)
+ # Marked notified so it won't fire again.
+ assert db.due_reminders(datetime.now(timezone.utc)) == []
--- /dev/null
+"""Pure-function tests for llm helpers (no ollama needed)."""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from segismundo.llm import _soften_tense # noqa: E402
+
+
+def test_soften_tense_past_to_present():
+ assert _soften_tense("¿cuál era la clave?") == "¿cuál es la clave?"
+ assert _soften_tense("¿cuáles eran mis notas?") == "¿cuáles son mis notas?"
+ assert _soften_tense("¿dónde estaba el coche?") == "¿dónde está el coche?"
+
+
+def test_soften_tense_leaves_present_alone():
+ assert _soften_tense("¿cuál es la clave?") == "¿cuál es la clave?"
--- /dev/null
+"""Tests for Spanish time-phrase normalization + parsing (no network needed)."""
+
+import os
+import sys
+from datetime import datetime
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from zoneinfo import ZoneInfo # noqa: E402
+
+from datetime import timedelta # noqa: E402
+
+from segismundo.timeparse import ( # noqa: E402
+ _normalize_es_time,
+ parse_date_range,
+ parse_when,
+)
+
+TZ = ZoneInfo("Europe/Madrid")
+
+
+def test_normalize_variants():
+ assert _normalize_es_time("mañana a las 9") == "mañana 09:00"
+ assert _normalize_es_time("a las 9 y media") == "09:30"
+ assert _normalize_es_time("a las 9 y cuarto") == "09:15"
+ assert _normalize_es_time("a las 9:45") == "09:45"
+ assert _normalize_es_time("a las 5 de la tarde") == "17:00"
+ # leading article before a weekday is stripped (dateparser quirk)
+ assert _normalize_es_time("el viernes a la 1") == "viernes 01:00"
+
+
+def test_parse_when_sets_correct_hour():
+ dt = parse_when("mañana a las 9", TZ)
+ assert dt is not None and dt.hour == 9 and dt.minute == 0
+
+
+def test_parse_when_afternoon():
+ dt = parse_when("hoy a las 5 de la tarde", TZ)
+ assert dt is not None and dt.hour == 17
+
+
+def test_parse_when_weekday_with_article():
+ # "el viernes ..." used to defeat dateparser; the article is now stripped.
+ dt = parse_when("el viernes a las 5 de la tarde", TZ)
+ assert dt is not None and dt.hour == 17
+
+
+def test_parse_when_relative():
+ base = datetime.now(TZ)
+ dt = parse_when("en 2 horas", TZ)
+ assert dt is not None and dt > base
+
+
+def test_parse_when_empty():
+ assert parse_when("", TZ) is None
+
+
+def test_normalize_spelled_out_numbers():
+ assert _normalize_es_time("a la una") == "01:00"
+ assert _normalize_es_time("a las nueve y media") == "09:30"
+ assert _normalize_es_time("a las cinco menos cuarto") == "04:45"
+ assert _normalize_es_time("a las once de la noche") == "23:00"
+
+
+def test_parse_spelled_out_with_day():
+ dt = parse_when("el viernes a las nueve y media", TZ)
+ assert dt is not None and dt.weekday() == 4 and (dt.hour, dt.minute) == (9, 30)
+
+
+def test_parse_bare_time_resolves_to_next_occurrence():
+ dt = parse_when("a la una", TZ)
+ assert dt is not None and (dt.hour, dt.minute) == (1, 0) and dt > datetime.now(TZ)
+
+
+def test_parse_next_weekday_phrasings():
+ for phrase in ("el martes que viene", "próximo martes", "el martes"):
+ dt = parse_when(phrase, TZ)
+ assert dt is not None and dt.weekday() == 1 # Tuesday
+ assert dt.hour == 9 # date-only defaults to 09:00
+
+
+def test_medianoche_stays_midnight():
+ dt = parse_when("a medianoche", TZ)
+ assert dt is not None and (dt.hour, dt.minute) == (0, 0)
+
+
+def test_date_range_ayer():
+ r = parse_date_range("¿qué grabé ayer?", TZ)
+ assert r is not None
+ start, end = r
+ assert (end - start) == timedelta(days=1)
+ now = datetime.now(TZ)
+ assert end.date() == now.date()
+ assert start.date() == (now - timedelta(days=1)).date()
+
+
+def test_date_range_semana_pasada():
+ r = parse_date_range("mis notas de la semana pasada", TZ)
+ assert r is not None and (r[1] - r[0]) == timedelta(days=7)
+
+
+def test_date_range_ultimos_n_dias():
+ r = parse_date_range("¿qué apunté los últimos 3 días?", TZ)
+ assert r is not None and (r[1] - r[0]) == timedelta(days=4) # 3 days back + today
+
+
+def test_date_range_none_when_no_period():
+ assert parse_date_range("¿cuál es la contraseña del wifi?", TZ) is None