#!/usr/bin/env python3 import os, sys, json, pathlib, argparse, requests def iter_texts(root): for p in pathlib.Path(root).rglob("*.txt"): yield p def store(memory_url, collection, text, meta): payload = {"text": text, "metadata": {"source": meta.get("source"), "path": meta.get("path")}} r = requests.post(f"{memory_url}/store", json=payload, timeout=30) r.raise_for_status() return r.json() def main(): ap = argparse.ArgumentParser() ap.add_argument("--root", required=True, help="Corpus-Root (z.B. /srv/ai/corpus)") ap.add_argument("--memory", required=False, default=None, help="Memory-API URL (z.B. http://127.0.0.1:8085)") ap.add_argument("--collection", default="chat-memory") args = ap.parse_args() # Optional: memory-URL aus sources.yml lesen if not args.memory: conf = pathlib.Path(__file__).with_name("sources.yml") if conf.exists(): import yaml cfg = yaml.safe_load(conf.read_text()) args.memory = cfg.get("memory", {}).get("url") if not args.memory: print("Bitte --memory angeben oder in sources.yml hinterlegen.", file=sys.stderr) sys.exit(1) for p in iter_texts(args.root): try: text = p.read_text(errors="ignore") meta = {"path": str(p), "source": "crawler"} store(args.memory, args.collection, text, meta) print("✔ stored", p) except Exception as e: print("✖", p, e, file=sys.stderr) if __name__ == "__main__": main()