Initial commit: working RIP/INEX_TM help processing pipeline

- help_processor.py: parses .docx/.html/.pdf/.doc/.txt, extracts images, classifies sections via Claude API, writes to SQL Server - generate_html.py: builds interactive HTML viewer (Home/Editor/Search/Generator) - save_keywords.py: applies keyword edits back to DB - Prefix-scoped DB schema (RIP_help_files, RIP_help_sections) so multiple projects share the same database without collision - BAT launchers per project (RIP_load.bat, INEX_TM_load.bat, ...) load credentials from gitignored .env via _load_env.bat - Rich HTML preservation for .html sources (html_text column) - Image extraction for all formats with MS Word / LibreOffice fallback for .doc Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 11:52:11 +03:00
commit 711053b8bd
16 changed files with 2421 additions and 0 deletions
--- a/save_keywords.py
+++ b/save_keywords.py
@@ -0,0 +1,77 @@
+"""
+save_keywords.py
+================
+Чете keywords_changes.json (генериран от браузъра)
+и записва промените в SQL Server.
+
+Стартирай с:  python save_keywords.py
+"""
+
+import os, sys, json
+from pathlib import Path
+from datetime import datetime
+
+try:
+    import pyodbc
+except ImportError:
+    sys.exit("Инсталирай pyodbc:  pip install pyodbc")
+
+CONN_STR = os.getenv(
+    "HELP_DB_CONN",
+    "DRIVER={ODBC Driver 18 for SQL Server};"
+    "TrustServerCertificate=yes;"
+    "SERVER=94.26.63.238,13151;DATABASE=blondina;"
+    "UID=blondina_login;PWD=blondina_parola_123"
+)
+CHANGES_FILE = Path(__file__).parent / "keywords_changes.json"
+
+
+def main():
+    if not CHANGES_FILE.exists():
+        print("Файлът keywords_changes.json не е намерен.")
+        print("Запази промените от браузъра първо.")
+        return
+
+    changes = json.loads(CHANGES_FILE.read_text(encoding="utf-8"))
+    if not changes:
+        print("Няма промени за запис.")
+        return
+
+    print(f"Записвам {len(changes)} промени в БД...")
+    conn = pyodbc.connect(CONN_STR, autocommit=False)
+    cur  = conn.cursor()
+    ok, err = 0, 0
+
+    for item in changes:
+        code     = item.get("code", "").strip()
+        keywords = item.get("keywords", "").strip()
+        if not code:
+            continue
+        try:
+            cur.execute(
+                "UPDATE RIP_help_sections SET keywords=?, updated_at=GETDATE() WHERE code=?",
+                keywords, code
+            )
+            if cur.rowcount > 0:
+                ok += 1
+                print(f"  ✓  {code}")
+            else:
+                print(f"  ?  {code} — не е намерен в БД")
+        except Exception as e:
+            print(f"  ✗  {code} — {e}")
+            err += 1
+
+    conn.commit()
+    conn.close()
+
+    print(f"\nГотово: {ok} записани, {err} грешки.")
+
+    # Архивираме файла
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    archive = CHANGES_FILE.parent / f"keywords_changes_{ts}.json"
+    CHANGES_FILE.rename(archive)
+    print(f"Файлът е архивиран като: {archive.name}")
+
+
+if __name__ == "__main__":
+    main()