Migrate to PostgreSQL + add FastAPI webapp for Coolify deploy
Backend migration: - Replace pyodbc/SQL Server with psycopg2/PostgreSQL throughout - Rewrite Database class with portable SQL: SERIAL, ON CONFLICT, NOW() - Lowercase table names (rip_help_files, rip_help_sections) - Postgres convention - libpq connection string format in HELP_DB_CONN Webapp (webapp/): - FastAPI app: GET /, GET /images/<f>, GET /home-image, GET /api/sections, POST /api/keywords/<code>, GET /healthz - Jinja2 template extracted from generate_html.py with HTTP image URLs - Direct keyword save to DB (no JSON download detour) - Same prefix scoping as CLI tools (?prefix=RIP) Deployment: - Dockerfile (python:3.12-slim + uvicorn) - docker-compose.yml for local dev - requirements-webapp.txt (minimal, no Windows-only deps) - .dockerignore excludes pipeline scripts and BAT files - README updated with webapp section and Coolify deploy guide Also: switch AI model to claude-haiku-4-5 (~3x cheaper, same quality for this task) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -33,7 +33,7 @@ from datetime import datetime
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
import pyodbc
|
||||
import psycopg2
|
||||
import anthropic
|
||||
from docx import Document
|
||||
from bs4 import BeautifulSoup
|
||||
@@ -73,7 +73,7 @@ log = logging.getLogger(__name__)
|
||||
|
||||
MIN_SECTION_TOKENS = 60 # секции под тази граница се сливат с предишната
|
||||
MAX_AI_CHARS = 4000 # максимален текст, изпращан към Claude за класификация
|
||||
AI_MODEL = "claude-sonnet-4-6"
|
||||
AI_MODEL = "claude-haiku-4-5"
|
||||
MIN_IMAGE_PX = 50 # картинки под NxN px се пропускат (иконки/булети)
|
||||
|
||||
|
||||
@@ -157,116 +157,52 @@ class ProcessedSection:
|
||||
# База данни
|
||||
# ──────────────────────────────────────────────
|
||||
|
||||
def _ensure_trust_server_certificate(conn_str: str) -> str:
|
||||
"""Добавя TrustServerCertificate=yes към connection string ако липсва."""
|
||||
if not conn_str:
|
||||
return conn_str
|
||||
if re.search(r"TrustServerCertificate\s*=", conn_str, re.IGNORECASE):
|
||||
return conn_str
|
||||
sep = "" if conn_str.rstrip().endswith(";") else ";"
|
||||
return f"{conn_str}{sep}TrustServerCertificate=yes;"
|
||||
|
||||
|
||||
class Database:
|
||||
"""PostgreSQL backend (psycopg2). Connection string е libpq формат:
|
||||
'host=... port=... dbname=... user=... password=...'
|
||||
"""
|
||||
def __init__(self, conn_str: str):
|
||||
self.conn_str = _ensure_trust_server_certificate(conn_str)
|
||||
self.conn = pyodbc.connect(self.conn_str, autocommit=False)
|
||||
self.conn_str = conn_str
|
||||
self.conn = psycopg2.connect(conn_str)
|
||||
self._ensure_schema()
|
||||
|
||||
def _ensure_schema(self):
|
||||
"""Създава таблиците ако не съществуват."""
|
||||
"""Създава таблиците ако не съществуват (Postgres syntax)."""
|
||||
cur = self.conn.cursor()
|
||||
cur.execute("""
|
||||
IF NOT EXISTS (SELECT 1 FROM sys.tables WHERE name='RIP_help_files')
|
||||
CREATE TABLE RIP_help_files (
|
||||
id INT IDENTITY PRIMARY KEY,
|
||||
prefix NVARCHAR(50) NOT NULL DEFAULT 'HLP',
|
||||
file_path NVARCHAR(1000) NOT NULL,
|
||||
CREATE TABLE IF NOT EXISTS rip_help_files (
|
||||
id SERIAL PRIMARY KEY,
|
||||
prefix VARCHAR(50) NOT NULL DEFAULT 'HLP',
|
||||
file_path VARCHAR(1000) NOT NULL,
|
||||
file_hash CHAR(64) NOT NULL,
|
||||
processed_at DATETIME2 NOT NULL DEFAULT GETDATE(),
|
||||
section_count INT NOT NULL DEFAULT 0,
|
||||
CONSTRAINT UQ_RIP_help_files_prefix_path UNIQUE (prefix, file_path)
|
||||
)""")
|
||||
# Migrate: добавяме колонка prefix ако таблицата е по-стара версия
|
||||
cur.execute("""
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM sys.columns
|
||||
WHERE object_id=OBJECT_ID('RIP_help_files') AND name='prefix'
|
||||
processed_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||
section_count INTEGER NOT NULL DEFAULT 0,
|
||||
UNIQUE (prefix, file_path)
|
||||
)
|
||||
BEGIN
|
||||
ALTER TABLE RIP_help_files ADD prefix NVARCHAR(50) NOT NULL
|
||||
CONSTRAINT DF_RIP_help_files_prefix DEFAULT 'HLP' WITH VALUES;
|
||||
END
|
||||
""")
|
||||
# Migrate: ако има стара UNIQUE на file_path сама (без prefix), сваляме я
|
||||
cur.execute("""
|
||||
DECLARE @c NVARCHAR(200);
|
||||
SELECT @c = i.name FROM sys.indexes i
|
||||
WHERE i.object_id=OBJECT_ID('RIP_help_files')
|
||||
AND i.is_unique=1
|
||||
AND i.name <> 'UQ_RIP_help_files_prefix_path'
|
||||
AND i.name NOT LIKE 'PK_%'
|
||||
AND (SELECT COUNT(*) FROM sys.index_columns ic
|
||||
WHERE ic.object_id=i.object_id AND ic.index_id=i.index_id) = 1;
|
||||
IF @c IS NOT NULL EXEC('ALTER TABLE RIP_help_files DROP CONSTRAINT [' + @c + ']');
|
||||
""")
|
||||
# Migrate: създаваме новата composite UNIQUE ако липсва
|
||||
cur.execute("""
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM sys.indexes
|
||||
WHERE name='UQ_RIP_help_files_prefix_path'
|
||||
AND object_id=OBJECT_ID('RIP_help_files')
|
||||
)
|
||||
ALTER TABLE RIP_help_files
|
||||
ADD CONSTRAINT UQ_RIP_help_files_prefix_path UNIQUE (prefix, file_path)
|
||||
""")
|
||||
cur.execute("""
|
||||
IF NOT EXISTS (SELECT 1 FROM sys.tables WHERE name='RIP_help_sections')
|
||||
CREATE TABLE RIP_help_sections (
|
||||
id INT IDENTITY PRIMARY KEY,
|
||||
prefix NVARCHAR(50) NOT NULL DEFAULT 'HLP',
|
||||
code NVARCHAR(80) NOT NULL UNIQUE,
|
||||
source_file NVARCHAR(1000) NOT NULL,
|
||||
title NVARCHAR(500),
|
||||
keywords NVARCHAR(300),
|
||||
char_count INT,
|
||||
output_path NVARCHAR(1000),
|
||||
images NVARCHAR(MAX),
|
||||
created_at DATETIME2 NOT NULL DEFAULT GETDATE(),
|
||||
updated_at DATETIME2 NOT NULL DEFAULT GETDATE()
|
||||
)""")
|
||||
# Migrate: добавяме колонка prefix ако таблицата е по-стара версия
|
||||
cur.execute("""
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM sys.columns
|
||||
WHERE object_id=OBJECT_ID('RIP_help_sections') AND name='prefix'
|
||||
CREATE TABLE IF NOT EXISTS rip_help_sections (
|
||||
id SERIAL PRIMARY KEY,
|
||||
prefix VARCHAR(50) NOT NULL DEFAULT 'HLP',
|
||||
code VARCHAR(80) NOT NULL UNIQUE,
|
||||
source_file VARCHAR(1000) NOT NULL,
|
||||
title VARCHAR(500),
|
||||
keywords VARCHAR(300),
|
||||
char_count INTEGER,
|
||||
output_path VARCHAR(1000),
|
||||
images TEXT,
|
||||
html_text TEXT,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||||
)
|
||||
ALTER TABLE RIP_help_sections ADD prefix NVARCHAR(50) NOT NULL
|
||||
CONSTRAINT DF_RIP_help_sections_prefix DEFAULT 'HLP' WITH VALUES
|
||||
""")
|
||||
# Migrate: добавяме колонка 'images' ако таблицата е създадена по-стара версия
|
||||
cur.execute("""
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM sys.columns
|
||||
WHERE object_id=OBJECT_ID('RIP_help_sections') AND name='images'
|
||||
)
|
||||
ALTER TABLE RIP_help_sections ADD images NVARCHAR(MAX) NULL
|
||||
CREATE INDEX IF NOT EXISTS ix_rip_help_sections_keywords
|
||||
ON rip_help_sections(keywords)
|
||||
""")
|
||||
# Migrate: добавяме колонка 'html_text' (rich HTML с форматиране)
|
||||
cur.execute("""
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM sys.columns
|
||||
WHERE object_id=OBJECT_ID('RIP_help_sections') AND name='html_text'
|
||||
)
|
||||
ALTER TABLE RIP_help_sections ADD html_text NVARCHAR(MAX) NULL
|
||||
""")
|
||||
# Индекси за търсене по ключови думи и заглавие
|
||||
cur.execute("""
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM sys.indexes
|
||||
WHERE name='IX_RIP_help_sections_keywords' AND object_id=OBJECT_ID('RIP_help_sections')
|
||||
)
|
||||
CREATE INDEX IX_RIP_help_sections_keywords ON RIP_help_sections(keywords)
|
||||
CREATE INDEX IF NOT EXISTS ix_rip_help_sections_prefix
|
||||
ON rip_help_sections(prefix)
|
||||
""")
|
||||
self.conn.commit()
|
||||
log.info("Схемата е проверена / създадена.")
|
||||
@@ -274,8 +210,8 @@ class Database:
|
||||
def get_file_hash(self, prefix: str, file_path: str) -> Optional[str]:
|
||||
cur = self.conn.cursor()
|
||||
cur.execute(
|
||||
"SELECT file_hash FROM RIP_help_files WHERE prefix=? AND file_path=?",
|
||||
prefix, file_path
|
||||
"SELECT file_hash FROM rip_help_files WHERE prefix=%s AND file_path=%s",
|
||||
(prefix, file_path)
|
||||
)
|
||||
row = cur.fetchone()
|
||||
return row[0] if row else None
|
||||
@@ -283,23 +219,20 @@ class Database:
|
||||
def upsert_file(self, prefix: str, file_path: str, file_hash: str, section_count: int):
|
||||
cur = self.conn.cursor()
|
||||
cur.execute("""
|
||||
MERGE RIP_help_files AS t
|
||||
USING (SELECT ? AS prefix, ? AS file_path, ? AS file_hash, ? AS section_count) AS s
|
||||
ON t.prefix = s.prefix AND t.file_path = s.file_path
|
||||
WHEN MATCHED THEN
|
||||
UPDATE SET file_hash=s.file_hash, section_count=s.section_count,
|
||||
processed_at=GETDATE()
|
||||
WHEN NOT MATCHED THEN
|
||||
INSERT (prefix, file_path, file_hash, section_count)
|
||||
VALUES (s.prefix, s.file_path, s.file_hash, s.section_count);
|
||||
""", prefix, file_path, file_hash, section_count)
|
||||
INSERT INTO rip_help_files (prefix, file_path, file_hash, section_count)
|
||||
VALUES (%s, %s, %s, %s)
|
||||
ON CONFLICT (prefix, file_path) DO UPDATE SET
|
||||
file_hash = EXCLUDED.file_hash,
|
||||
section_count= EXCLUDED.section_count,
|
||||
processed_at = NOW()
|
||||
""", (prefix, file_path, file_hash, section_count))
|
||||
self.conn.commit()
|
||||
|
||||
def delete_sections_for_file(self, prefix: str, file_path: str):
|
||||
cur = self.conn.cursor()
|
||||
cur.execute(
|
||||
"DELETE FROM RIP_help_sections WHERE prefix=? AND source_file=?",
|
||||
prefix, file_path
|
||||
"DELETE FROM rip_help_sections WHERE prefix=%s AND source_file=%s",
|
||||
(prefix, file_path)
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
@@ -307,21 +240,20 @@ class Database:
|
||||
"""Връща всички source_file пътища за даден префикс."""
|
||||
cur = self.conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT file_path FROM RIP_help_files WHERE prefix=?
|
||||
SELECT file_path FROM rip_help_files WHERE prefix=%s
|
||||
UNION
|
||||
SELECT source_file FROM RIP_help_sections WHERE prefix=?
|
||||
""", prefix, prefix)
|
||||
SELECT source_file FROM rip_help_sections WHERE prefix=%s
|
||||
""", (prefix, prefix))
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
|
||||
def section_output_paths_for(self, prefix: str, source_files: list[str]) -> list[str]:
|
||||
if not source_files:
|
||||
return []
|
||||
cur = self.conn.cursor()
|
||||
placeholders = ",".join("?" for _ in source_files)
|
||||
cur.execute(
|
||||
f"SELECT output_path FROM RIP_help_sections "
|
||||
f"WHERE prefix=? AND source_file IN ({placeholders})",
|
||||
prefix, *source_files
|
||||
"SELECT output_path FROM rip_help_sections "
|
||||
"WHERE prefix=%s AND source_file = ANY(%s)",
|
||||
(prefix, list(source_files))
|
||||
)
|
||||
return [r[0] for r in cur.fetchall() if r[0]]
|
||||
|
||||
@@ -329,17 +261,16 @@ class Database:
|
||||
if not source_files:
|
||||
return 0
|
||||
cur = self.conn.cursor()
|
||||
placeholders = ",".join("?" for _ in source_files)
|
||||
cur.execute(
|
||||
f"DELETE FROM RIP_help_sections "
|
||||
f"WHERE prefix=? AND source_file IN ({placeholders})",
|
||||
prefix, *source_files
|
||||
"DELETE FROM rip_help_sections "
|
||||
"WHERE prefix=%s AND source_file = ANY(%s)",
|
||||
(prefix, list(source_files))
|
||||
)
|
||||
sec_deleted = cur.rowcount
|
||||
cur.execute(
|
||||
f"DELETE FROM RIP_help_files "
|
||||
f"WHERE prefix=? AND file_path IN ({placeholders})",
|
||||
prefix, *source_files
|
||||
"DELETE FROM rip_help_files "
|
||||
"WHERE prefix=%s AND file_path = ANY(%s)",
|
||||
(prefix, list(source_files))
|
||||
)
|
||||
self.conn.commit()
|
||||
return sec_deleted
|
||||
@@ -347,22 +278,22 @@ class Database:
|
||||
def insert_section(self, prefix: str, ps: ProcessedSection, output_path: str):
|
||||
cur = self.conn.cursor()
|
||||
cur.execute("""
|
||||
MERGE RIP_help_sections AS t
|
||||
USING (SELECT ? AS code) AS s ON t.code = s.code
|
||||
WHEN MATCHED THEN
|
||||
UPDATE SET prefix=?, source_file=?, title=?, keywords=?,
|
||||
char_count=?, output_path=?, images=?, html_text=?,
|
||||
updated_at=GETDATE()
|
||||
WHEN NOT MATCHED THEN
|
||||
INSERT (prefix, code, source_file, title, keywords, char_count, output_path,
|
||||
images, html_text)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);
|
||||
""",
|
||||
ps.code, # USING
|
||||
prefix, ps.source_file, ps.title, ps.keywords, # UPDATE SET
|
||||
ps.char_count, output_path, ps.images_json, ps.html_text,
|
||||
prefix, ps.code, ps.source_file, ps.title, ps.keywords, # INSERT
|
||||
ps.char_count, output_path, ps.images_json, ps.html_text)
|
||||
INSERT INTO rip_help_sections
|
||||
(prefix, code, source_file, title, keywords,
|
||||
char_count, output_path, images, html_text)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (code) DO UPDATE SET
|
||||
prefix = EXCLUDED.prefix,
|
||||
source_file = EXCLUDED.source_file,
|
||||
title = EXCLUDED.title,
|
||||
keywords = EXCLUDED.keywords,
|
||||
char_count = EXCLUDED.char_count,
|
||||
output_path = EXCLUDED.output_path,
|
||||
images = EXCLUDED.images,
|
||||
html_text = EXCLUDED.html_text,
|
||||
updated_at = NOW()
|
||||
""", (prefix, ps.code, ps.source_file, ps.title, ps.keywords,
|
||||
ps.char_count, output_path, ps.images_json, ps.html_text))
|
||||
self.conn.commit()
|
||||
|
||||
def close(self):
|
||||
|
||||
Reference in New Issue
Block a user