Initial commit: working RIP/INEX_TM help processing pipeline

- help_processor.py: parses .docx/.html/.pdf/.doc/.txt, extracts images,
  classifies sections via Claude API, writes to SQL Server
- generate_html.py: builds interactive HTML viewer (Home/Editor/Search/Generator)
- save_keywords.py: applies keyword edits back to DB
- Prefix-scoped DB schema (RIP_help_files, RIP_help_sections) so multiple
  projects share the same database without collision
- BAT launchers per project (RIP_load.bat, INEX_TM_load.bat, ...) load
  credentials from gitignored .env via _load_env.bat
- Rich HTML preservation for .html sources (html_text column)
- Image extraction for all formats with MS Word / LibreOffice fallback for .doc

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-20 11:52:11 +03:00
commit 711053b8bd
16 changed files with 2421 additions and 0 deletions

77
save_keywords.py Normal file
View File

@@ -0,0 +1,77 @@
"""
save_keywords.py
================
Чете keywords_changes.json (генериран от браузъра)
и записва промените в SQL Server.
Стартирай с: python save_keywords.py
"""
import os, sys, json
from pathlib import Path
from datetime import datetime
try:
import pyodbc
except ImportError:
sys.exit("Инсталирай pyodbc: pip install pyodbc")
CONN_STR = os.getenv(
"HELP_DB_CONN",
"DRIVER={ODBC Driver 18 for SQL Server};"
"TrustServerCertificate=yes;"
"SERVER=94.26.63.238,13151;DATABASE=blondina;"
"UID=blondina_login;PWD=blondina_parola_123"
)
CHANGES_FILE = Path(__file__).parent / "keywords_changes.json"
def main():
if not CHANGES_FILE.exists():
print("Файлът keywords_changes.json не е намерен.")
print("Запази промените от браузъра първо.")
return
changes = json.loads(CHANGES_FILE.read_text(encoding="utf-8"))
if not changes:
print("Няма промени за запис.")
return
print(f"Записвам {len(changes)} промени в БД...")
conn = pyodbc.connect(CONN_STR, autocommit=False)
cur = conn.cursor()
ok, err = 0, 0
for item in changes:
code = item.get("code", "").strip()
keywords = item.get("keywords", "").strip()
if not code:
continue
try:
cur.execute(
"UPDATE RIP_help_sections SET keywords=?, updated_at=GETDATE() WHERE code=?",
keywords, code
)
if cur.rowcount > 0:
ok += 1
print(f"{code}")
else:
print(f" ? {code} — не е намерен в БД")
except Exception as e:
print(f"{code}{e}")
err += 1
conn.commit()
conn.close()
print(f"\nГотово: {ok} записани, {err} грешки.")
# Архивираме файла
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
archive = CHANGES_FILE.parent / f"keywords_changes_{ts}.json"
CHANGES_FILE.rename(archive)
print(f"Файлът е архивиран като: {archive.name}")
if __name__ == "__main__":
main()