Migrate to PostgreSQL + add FastAPI webapp for Coolify deploy

Backend migration:
- Replace pyodbc/SQL Server with psycopg2/PostgreSQL throughout
- Rewrite Database class with portable SQL: SERIAL, ON CONFLICT, NOW()
- Lowercase table names (rip_help_files, rip_help_sections) - Postgres convention
- libpq connection string format in HELP_DB_CONN

Webapp (webapp/):
- FastAPI app: GET /, GET /images/<f>, GET /home-image, GET /api/sections,
  POST /api/keywords/<code>, GET /healthz
- Jinja2 template extracted from generate_html.py with HTTP image URLs
- Direct keyword save to DB (no JSON download detour)
- Same prefix scoping as CLI tools (?prefix=RIP)

Deployment:
- Dockerfile (python:3.12-slim + uvicorn)
- docker-compose.yml for local dev
- requirements-webapp.txt (minimal, no Windows-only deps)
- .dockerignore excludes pipeline scripts and BAT files
- README updated with webapp section and Coolify deploy guide

Also: switch AI model to claude-haiku-4-5 (~3x cheaper, same quality for this task)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-20 17:00:44 +03:00
parent 711053b8bd
commit 9613420d1d
13 changed files with 1034 additions and 167 deletions

View File

@@ -33,7 +33,7 @@ from datetime import datetime
from dataclasses import dataclass, field
from typing import Optional
import pyodbc
import psycopg2
import anthropic
from docx import Document
from bs4 import BeautifulSoup
@@ -73,7 +73,7 @@ log = logging.getLogger(__name__)
MIN_SECTION_TOKENS = 60 # секции под тази граница се сливат с предишната
MAX_AI_CHARS = 4000 # максимален текст, изпращан към Claude за класификация
AI_MODEL = "claude-sonnet-4-6"
AI_MODEL = "claude-haiku-4-5"
MIN_IMAGE_PX = 50 # картинки под NxN px се пропускат (иконки/булети)
@@ -157,116 +157,52 @@ class ProcessedSection:
# База данни
# ──────────────────────────────────────────────
def _ensure_trust_server_certificate(conn_str: str) -> str:
"""Добавя TrustServerCertificate=yes към connection string ако липсва."""
if not conn_str:
return conn_str
if re.search(r"TrustServerCertificate\s*=", conn_str, re.IGNORECASE):
return conn_str
sep = "" if conn_str.rstrip().endswith(";") else ";"
return f"{conn_str}{sep}TrustServerCertificate=yes;"
class Database:
"""PostgreSQL backend (psycopg2). Connection string е libpq формат:
'host=... port=... dbname=... user=... password=...'
"""
def __init__(self, conn_str: str):
self.conn_str = _ensure_trust_server_certificate(conn_str)
self.conn = pyodbc.connect(self.conn_str, autocommit=False)
self.conn_str = conn_str
self.conn = psycopg2.connect(conn_str)
self._ensure_schema()
def _ensure_schema(self):
"""Създава таблиците ако не съществуват."""
"""Създава таблиците ако не съществуват (Postgres syntax)."""
cur = self.conn.cursor()
cur.execute("""
IF NOT EXISTS (SELECT 1 FROM sys.tables WHERE name='RIP_help_files')
CREATE TABLE RIP_help_files (
id INT IDENTITY PRIMARY KEY,
prefix NVARCHAR(50) NOT NULL DEFAULT 'HLP',
file_path NVARCHAR(1000) NOT NULL,
CREATE TABLE IF NOT EXISTS rip_help_files (
id SERIAL PRIMARY KEY,
prefix VARCHAR(50) NOT NULL DEFAULT 'HLP',
file_path VARCHAR(1000) NOT NULL,
file_hash CHAR(64) NOT NULL,
processed_at DATETIME2 NOT NULL DEFAULT GETDATE(),
section_count INT NOT NULL DEFAULT 0,
CONSTRAINT UQ_RIP_help_files_prefix_path UNIQUE (prefix, file_path)
)""")
# Migrate: добавяме колонка prefix ако таблицата е по-стара версия
cur.execute("""
IF NOT EXISTS (
SELECT 1 FROM sys.columns
WHERE object_id=OBJECT_ID('RIP_help_files') AND name='prefix'
processed_at TIMESTAMP NOT NULL DEFAULT NOW(),
section_count INTEGER NOT NULL DEFAULT 0,
UNIQUE (prefix, file_path)
)
BEGIN
ALTER TABLE RIP_help_files ADD prefix NVARCHAR(50) NOT NULL
CONSTRAINT DF_RIP_help_files_prefix DEFAULT 'HLP' WITH VALUES;
END
""")
# Migrate: ако има стара UNIQUE на file_path сама (без prefix), сваляме я
cur.execute("""
DECLARE @c NVARCHAR(200);
SELECT @c = i.name FROM sys.indexes i
WHERE i.object_id=OBJECT_ID('RIP_help_files')
AND i.is_unique=1
AND i.name <> 'UQ_RIP_help_files_prefix_path'
AND i.name NOT LIKE 'PK_%'
AND (SELECT COUNT(*) FROM sys.index_columns ic
WHERE ic.object_id=i.object_id AND ic.index_id=i.index_id) = 1;
IF @c IS NOT NULL EXEC('ALTER TABLE RIP_help_files DROP CONSTRAINT [' + @c + ']');
""")
# Migrate: създаваме новата composite UNIQUE ако липсва
cur.execute("""
IF NOT EXISTS (
SELECT 1 FROM sys.indexes
WHERE name='UQ_RIP_help_files_prefix_path'
AND object_id=OBJECT_ID('RIP_help_files')
)
ALTER TABLE RIP_help_files
ADD CONSTRAINT UQ_RIP_help_files_prefix_path UNIQUE (prefix, file_path)
""")
cur.execute("""
IF NOT EXISTS (SELECT 1 FROM sys.tables WHERE name='RIP_help_sections')
CREATE TABLE RIP_help_sections (
id INT IDENTITY PRIMARY KEY,
prefix NVARCHAR(50) NOT NULL DEFAULT 'HLP',
code NVARCHAR(80) NOT NULL UNIQUE,
source_file NVARCHAR(1000) NOT NULL,
title NVARCHAR(500),
keywords NVARCHAR(300),
char_count INT,
output_path NVARCHAR(1000),
images NVARCHAR(MAX),
created_at DATETIME2 NOT NULL DEFAULT GETDATE(),
updated_at DATETIME2 NOT NULL DEFAULT GETDATE()
)""")
# Migrate: добавяме колонка prefix ако таблицата е по-стара версия
cur.execute("""
IF NOT EXISTS (
SELECT 1 FROM sys.columns
WHERE object_id=OBJECT_ID('RIP_help_sections') AND name='prefix'
CREATE TABLE IF NOT EXISTS rip_help_sections (
id SERIAL PRIMARY KEY,
prefix VARCHAR(50) NOT NULL DEFAULT 'HLP',
code VARCHAR(80) NOT NULL UNIQUE,
source_file VARCHAR(1000) NOT NULL,
title VARCHAR(500),
keywords VARCHAR(300),
char_count INTEGER,
output_path VARCHAR(1000),
images TEXT,
html_text TEXT,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
)
ALTER TABLE RIP_help_sections ADD prefix NVARCHAR(50) NOT NULL
CONSTRAINT DF_RIP_help_sections_prefix DEFAULT 'HLP' WITH VALUES
""")
# Migrate: добавяме колонка 'images' ако таблицата е създадена по-стара версия
cur.execute("""
IF NOT EXISTS (
SELECT 1 FROM sys.columns
WHERE object_id=OBJECT_ID('RIP_help_sections') AND name='images'
)
ALTER TABLE RIP_help_sections ADD images NVARCHAR(MAX) NULL
CREATE INDEX IF NOT EXISTS ix_rip_help_sections_keywords
ON rip_help_sections(keywords)
""")
# Migrate: добавяме колонка 'html_text' (rich HTML с форматиране)
cur.execute("""
IF NOT EXISTS (
SELECT 1 FROM sys.columns
WHERE object_id=OBJECT_ID('RIP_help_sections') AND name='html_text'
)
ALTER TABLE RIP_help_sections ADD html_text NVARCHAR(MAX) NULL
""")
# Индекси за търсене по ключови думи и заглавие
cur.execute("""
IF NOT EXISTS (
SELECT 1 FROM sys.indexes
WHERE name='IX_RIP_help_sections_keywords' AND object_id=OBJECT_ID('RIP_help_sections')
)
CREATE INDEX IX_RIP_help_sections_keywords ON RIP_help_sections(keywords)
CREATE INDEX IF NOT EXISTS ix_rip_help_sections_prefix
ON rip_help_sections(prefix)
""")
self.conn.commit()
log.info("Схемата е проверена / създадена.")
@@ -274,8 +210,8 @@ class Database:
def get_file_hash(self, prefix: str, file_path: str) -> Optional[str]:
cur = self.conn.cursor()
cur.execute(
"SELECT file_hash FROM RIP_help_files WHERE prefix=? AND file_path=?",
prefix, file_path
"SELECT file_hash FROM rip_help_files WHERE prefix=%s AND file_path=%s",
(prefix, file_path)
)
row = cur.fetchone()
return row[0] if row else None
@@ -283,23 +219,20 @@ class Database:
def upsert_file(self, prefix: str, file_path: str, file_hash: str, section_count: int):
cur = self.conn.cursor()
cur.execute("""
MERGE RIP_help_files AS t
USING (SELECT ? AS prefix, ? AS file_path, ? AS file_hash, ? AS section_count) AS s
ON t.prefix = s.prefix AND t.file_path = s.file_path
WHEN MATCHED THEN
UPDATE SET file_hash=s.file_hash, section_count=s.section_count,
processed_at=GETDATE()
WHEN NOT MATCHED THEN
INSERT (prefix, file_path, file_hash, section_count)
VALUES (s.prefix, s.file_path, s.file_hash, s.section_count);
""", prefix, file_path, file_hash, section_count)
INSERT INTO rip_help_files (prefix, file_path, file_hash, section_count)
VALUES (%s, %s, %s, %s)
ON CONFLICT (prefix, file_path) DO UPDATE SET
file_hash = EXCLUDED.file_hash,
section_count= EXCLUDED.section_count,
processed_at = NOW()
""", (prefix, file_path, file_hash, section_count))
self.conn.commit()
def delete_sections_for_file(self, prefix: str, file_path: str):
cur = self.conn.cursor()
cur.execute(
"DELETE FROM RIP_help_sections WHERE prefix=? AND source_file=?",
prefix, file_path
"DELETE FROM rip_help_sections WHERE prefix=%s AND source_file=%s",
(prefix, file_path)
)
self.conn.commit()
@@ -307,21 +240,20 @@ class Database:
"""Връща всички source_file пътища за даден префикс."""
cur = self.conn.cursor()
cur.execute("""
SELECT file_path FROM RIP_help_files WHERE prefix=?
SELECT file_path FROM rip_help_files WHERE prefix=%s
UNION
SELECT source_file FROM RIP_help_sections WHERE prefix=?
""", prefix, prefix)
SELECT source_file FROM rip_help_sections WHERE prefix=%s
""", (prefix, prefix))
return [r[0] for r in cur.fetchall()]
def section_output_paths_for(self, prefix: str, source_files: list[str]) -> list[str]:
if not source_files:
return []
cur = self.conn.cursor()
placeholders = ",".join("?" for _ in source_files)
cur.execute(
f"SELECT output_path FROM RIP_help_sections "
f"WHERE prefix=? AND source_file IN ({placeholders})",
prefix, *source_files
"SELECT output_path FROM rip_help_sections "
"WHERE prefix=%s AND source_file = ANY(%s)",
(prefix, list(source_files))
)
return [r[0] for r in cur.fetchall() if r[0]]
@@ -329,17 +261,16 @@ class Database:
if not source_files:
return 0
cur = self.conn.cursor()
placeholders = ",".join("?" for _ in source_files)
cur.execute(
f"DELETE FROM RIP_help_sections "
f"WHERE prefix=? AND source_file IN ({placeholders})",
prefix, *source_files
"DELETE FROM rip_help_sections "
"WHERE prefix=%s AND source_file = ANY(%s)",
(prefix, list(source_files))
)
sec_deleted = cur.rowcount
cur.execute(
f"DELETE FROM RIP_help_files "
f"WHERE prefix=? AND file_path IN ({placeholders})",
prefix, *source_files
"DELETE FROM rip_help_files "
"WHERE prefix=%s AND file_path = ANY(%s)",
(prefix, list(source_files))
)
self.conn.commit()
return sec_deleted
@@ -347,22 +278,22 @@ class Database:
def insert_section(self, prefix: str, ps: ProcessedSection, output_path: str):
cur = self.conn.cursor()
cur.execute("""
MERGE RIP_help_sections AS t
USING (SELECT ? AS code) AS s ON t.code = s.code
WHEN MATCHED THEN
UPDATE SET prefix=?, source_file=?, title=?, keywords=?,
char_count=?, output_path=?, images=?, html_text=?,
updated_at=GETDATE()
WHEN NOT MATCHED THEN
INSERT (prefix, code, source_file, title, keywords, char_count, output_path,
images, html_text)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);
""",
ps.code, # USING
prefix, ps.source_file, ps.title, ps.keywords, # UPDATE SET
ps.char_count, output_path, ps.images_json, ps.html_text,
prefix, ps.code, ps.source_file, ps.title, ps.keywords, # INSERT
ps.char_count, output_path, ps.images_json, ps.html_text)
INSERT INTO rip_help_sections
(prefix, code, source_file, title, keywords,
char_count, output_path, images, html_text)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (code) DO UPDATE SET
prefix = EXCLUDED.prefix,
source_file = EXCLUDED.source_file,
title = EXCLUDED.title,
keywords = EXCLUDED.keywords,
char_count = EXCLUDED.char_count,
output_path = EXCLUDED.output_path,
images = EXCLUDED.images,
html_text = EXCLUDED.html_text,
updated_at = NOW()
""", (prefix, ps.code, ps.source_file, ps.title, ps.keywords,
ps.char_count, output_path, ps.images_json, ps.html_text))
self.conn.commit()
def close(self):