diff --git a/AGENTS.md b/AGENTS.md index 945ca47c..a040405f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -4,7 +4,8 @@ Ce référentiel utilise des agents/outils pour automatiser des modifications de ## Règle essentielle - Toujours écrire des tests pertinents (pytest) pour couvrir le correctif ou la fonctionnalité ajoutée. -- Toujours exécuter la suite de tests localement avec `pytest -q` et s'assurer qu'elle passe avant de conclure la tâche. +- Toujours exécuter la suite de tests localement avec `PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 pytest -q` pour accélérer l'exécution et s'assurer qu'elle passe avant de conclure la tâche. +- Inclure et valider un jeton CSRF pour toute requête POST/PUT/DELETE modifiant l'état (champ `csrf_token` ou en-tête `X-CSRFToken`). ## Détails pratiques - Emplacement des tests: placez-les sous `tests/` avec le préfixe `test_*.py`. @@ -15,7 +16,9 @@ Ce référentiel utilise des agents/outils pour automatiser des modifications de - Si des avertissements perturbent la lisibilité, nettoyez-les ou filtrez-les de manière ciblée, sans masquer des problèmes réels. ## Commandes utiles -- Lancer toute la suite: `pytest -q` +- Lancer toute la suite (résumé concis): `PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 pytest -q` +- Pour obtenir explicitement le résumé final (nombre de tests et durée), exécutez `PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 pytest` sans `-q` supplémentaire (le fichier `pytest.ini` l'inclut déjà) et relevez la dernière ligne de sortie. +- Interrompre au premier échec pour un diagnostic rapide: `PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 pytest -q --maxfail=1` - Exécuter un seul fichier: `pytest -q tests/test_mon_module.py` - Exécuter un seul test: `pytest -q tests/test_mon_module.py::test_cas_specifique` - Compter les tests rapidement (collect only): `pytest -q --collect-only | awk -F': ' '{s+=$2} END{print s}'` diff --git a/OPENAI_USAGE.md b/OPENAI_USAGE.md index f3b14b0b..dbde63d4 100644 --- a/OPENAI_USAGE.md +++ b/OPENAI_USAGE.md @@ -26,6 +26,7 @@ Note: L’ancien endpoint synchrone `POST /gestion_programme/update_verifier_pla - Grille d’évaluation: `src/app/tasks/generation_grille.py` - Logigramme de compétences: `src/app/tasks/generation_logigramme.py` - OCR/Imports: `src/app/tasks/ocr.py`, `src/app/tasks/import_plan_de_cours.py`, `src/app/tasks/import_grille.py`, `src/app/tasks/import_plan_cadre.py` +- Conversion DOCX→Schéma JSON: `src/app/tasks/docx_to_schema.py` (start `POST /docx_to_schema/start`) Tous suivent le pattern: diff --git a/requirements.txt b/requirements.txt index cb3c308c..cf520742 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,6 +34,7 @@ python-dotenv pytz redis reportlab +pytest-asyncio requests starlette tiktoken diff --git a/src/app/__init__.py b/src/app/__init__.py index 23977b9d..08bab513 100644 --- a/src/app/__init__.py +++ b/src/app/__init__.py @@ -43,6 +43,7 @@ from .routes import competences_management # noqa: F401 from .routes import fil_conducteur_routes # noqa: F401 from .routes import settings_departements # noqa: F401 +from .routes import admin_docx_schema # noqa: F401 from .routes.chat import chat # Import blueprints from .routes.cours import cours_bp @@ -58,7 +59,6 @@ from .routes.api import api_bp from .routes.oauth import oauth_bp from .routes.tasks import tasks_bp -from ..mcp_server.server import init_app as init_mcp_server # Import version from ..config.version import __version__ @@ -246,6 +246,7 @@ def load_user(user_id): init_change_tracking(db) # Bind Flask app to MCP server for OAuth verification + from ..mcp_server.server import init_app as init_mcp_server init_mcp_server(app) if not testing: @@ -297,6 +298,15 @@ def asset_url(path: str) -> str: # Expose csrf_token() helper globally for templates return dict(has_endpoint=has_endpoint, asset_url=asset_url, csrf_token=generate_csrf) + @app.context_processor + def inject_docx_schema_pages(): + try: + from .models import DocxSchemaPage + pages = DocxSchemaPage.query.order_by(DocxSchemaPage.created_at.asc()).all() + except Exception: + pages = [] + return dict(docx_schema_pages=pages) + @app.before_request def before_request(): # Allow static files and explicitly public routes to bypass auth redirect diff --git a/src/app/forms.py b/src/app/forms.py index 2d2a8bda..edb7063b 100644 --- a/src/app/forms.py +++ b/src/app/forms.py @@ -73,6 +73,11 @@ class FileUploadForm(FlaskForm): file = FileField("Importez un fichier PDF", validators=[DataRequired()]) submit = SubmitField("Envoyer") + +class DocxToSchemaForm(FlaskForm): + file = FileField("Fichier DOCX ou PDF", validators=[DataRequired()]) + submit = SubmitField("Convertir") + class AssociateDevisForm(FlaskForm): base_filename = HiddenField(validators=[DataRequired()]) programme_id = SelectField("Choisir le Programme Cible :", coerce=int, validators=[DataRequired()]) diff --git a/src/app/models.py b/src/app/models.py index 04558c5c..c6143036 100644 --- a/src/app/models.py +++ b/src/app/models.py @@ -256,6 +256,17 @@ def get_current(cls): db.session.rollback() return obj +class DocxSchemaPage(db.Model): + """Page générée à partir d'une validation de schéma DOCX.""" + __tablename__ = 'docx_schema_pages' + + id = db.Column(db.Integer, primary_key=True) + title = db.Column(db.String(255), nullable=True) + json_schema = db.Column(db.JSON, nullable=False) + markdown_content = db.Column(db.Text, nullable=True) + created_at = db.Column(db.DateTime, default=now_utc) + + class EvaluationSavoirFaire(db.Model): __tablename__ = 'evaluation_savoirfaire' diff --git a/src/app/routes/admin_docx_schema.py b/src/app/routes/admin_docx_schema.py new file mode 100644 index 00000000..b673c776 --- /dev/null +++ b/src/app/routes/admin_docx_schema.py @@ -0,0 +1,178 @@ +import os +import re +import time +from flask import render_template, request, jsonify, current_app, redirect, url_for, session +from flask_login import login_required, current_user + +from ..forms import DocxToSchemaForm +from ..tasks.docx_to_schema import docx_to_json_schema_task +from ..models import db, DocxSchemaPage, SectionAISettings +from .routes import main +from ...utils.decorator import role_required, ensure_profile_completed + +DEFAULT_DOCX_TO_SCHEMA_PROMPT = ( + "Propose un schéma JSON simple, cohérent et normalisé pour représenter parfaitement ce document. " + "Retourne un objet structuré avec quatre clés : `title`, `description`, `schema` et `markdown`. " + "`schema` contient le schéma JSON complet, `markdown` une version Markdown fidèle au document. " + "Chaque champ du schéma doit inclure un titre et une description et la hiérarchie doit être respectée. " + "Ne retourne que cet objet JSON." +) + + +@main.route('/docx_to_schema', methods=['GET']) +@role_required('admin') +@ensure_profile_completed +def docx_to_schema_page(): + form = DocxToSchemaForm() + return render_template('settings/docx_to_schema.html', form=form) + + +@main.route('/docx_to_schema/start', methods=['POST']) +@role_required('admin') +@ensure_profile_completed +def docx_to_schema_start(): + form = DocxToSchemaForm() + if not form.validate_on_submit(): + return jsonify({'error': 'Invalid submission.', 'details': form.errors}), 400 + + file = form.file.data + if not file or not file.filename.lower().endswith(('.docx', '.pdf')): + return jsonify({'error': 'Veuillez fournir un fichier .docx ou .pdf.'}), 400 + + upload_dir = os.path.join(current_app.config.get('UPLOAD_FOLDER', 'uploads')) + os.makedirs(upload_dir, exist_ok=True) + safe_name = re.sub(r'[^A-Za-z0-9_.-]+', '_', file.filename) + stored_name = f"docx_schema_{int(time.time())}_{safe_name}" + stored_path = os.path.join(upload_dir, stored_name) + file.save(stored_path) + + sa = SectionAISettings.get_for('docx_to_schema') + model = sa.ai_model or 'gpt-4o-mini' + reasoning = sa.reasoning_effort or 'medium' + verbosity = sa.verbosity or 'medium' + system_prompt = sa.system_prompt or DEFAULT_DOCX_TO_SCHEMA_PROMPT + + task = docx_to_json_schema_task.delay(stored_path, model, reasoning, verbosity, system_prompt, current_user.id) + return jsonify({'task_id': task.id}), 202 + + +@main.route('/docx_to_schema/preview', methods=['GET', 'POST']) +@role_required('admin') +@ensure_profile_completed +def docx_to_schema_preview_temp(): + if request.method == 'POST': + data = request.get_json() or {} + schema = data.get('schema') + title = data.get('title') + description = data.get('description') + if isinstance(schema, dict): + if title and 'title' not in schema: + schema['title'] = title + if description and 'description' not in schema: + schema['description'] = description + session['pending_docx_schema'] = schema + session['pending_docx_markdown'] = data.get('markdown') + session['pending_docx_title'] = title + session['pending_docx_description'] = description + return jsonify({'ok': True}) + schema = session.get('pending_docx_schema') + markdown = session.get('pending_docx_markdown') + title = session.get('pending_docx_title') + description = session.get('pending_docx_description') + if not schema: + return redirect(url_for('main.docx_to_schema_page')) + return render_template('docx_schema_validate.html', schema=schema, markdown=markdown, title=title, description=description) + + +@main.route('/docx_to_schema/validate', methods=['POST']) +@role_required('admin') +@ensure_profile_completed +def docx_to_schema_validate(): + """Persiste le schéma validé et retourne l'identifiant de la nouvelle page.""" + data = request.get_json() or {} + schema = data.get('schema') + markdown = data.get('markdown') + if not schema: + return jsonify({'error': 'Schéma manquant.'}), 400 + + title = data.get('title') or schema.get('title') or schema.get('titre') or f"Schéma {int(time.time())}" + description = data.get('description') or schema.get('description') + if isinstance(schema, dict): + if title and 'title' not in schema: + schema['title'] = title + if description and 'description' not in schema: + schema['description'] = description + page = DocxSchemaPage(title=title, json_schema=schema, markdown_content=markdown) + db.session.add(page) + db.session.commit() + session.pop('pending_docx_schema', None) + session.pop('pending_docx_markdown', None) + session.pop('pending_docx_title', None) + session.pop('pending_docx_description', None) + return jsonify({'success': True, 'page_id': page.id}), 201 + +@main.route('/docx_schema', methods=['GET']) +@role_required('admin') +@ensure_profile_completed +def docx_schema_pages(): + pages = DocxSchemaPage.query.order_by(DocxSchemaPage.created_at.desc()).all() + return render_template('docx_schema_list.html', pages=pages) + + +@main.route('/docx_schema/', methods=['GET']) +@role_required('admin') +@ensure_profile_completed +def docx_schema_page_view(page_id): + page = DocxSchemaPage.query.get_or_404(page_id) + return render_template('docx_schema_preview.html', page=page) + + +@main.route('/docx_schema//json', methods=['GET']) +@role_required('admin') +@ensure_profile_completed +def docx_schema_page_json(page_id): + page = DocxSchemaPage.query.get_or_404(page_id) + return render_template('docx_schema_json.html', page=page) + + +@main.route('/docx_schema//edit', methods=['POST']) +@role_required('admin') +@ensure_profile_completed +def docx_schema_page_edit(page_id): + """Met à jour le schéma JSON d'une page existante.""" + page = DocxSchemaPage.query.get_or_404(page_id) + data = request.get_json() or {} + schema = data.get('schema') + if not schema: + return jsonify({'error': 'Schéma manquant.'}), 400 + page.json_schema = schema + page.title = schema.get('title') or schema.get('titre') or page.title + db.session.commit() + return jsonify({'success': True}) + + +@main.route('/docx_schema//rename', methods=['POST']) +@role_required('admin') +@ensure_profile_completed +def docx_schema_page_rename(page_id): + """Met à jour uniquement le titre d'un schéma existant.""" + page = DocxSchemaPage.query.get_or_404(page_id) + data = request.get_json() or {} + title = data.get('title') + if not title: + return jsonify({'error': 'Titre manquant.'}), 400 + page.title = title + if isinstance(page.json_schema, dict): + page.json_schema['title'] = title + db.session.commit() + return jsonify({'success': True}) + + +@main.route('/docx_schema//delete', methods=['POST']) +@role_required('admin') +@ensure_profile_completed +def docx_schema_page_delete(page_id): + page = DocxSchemaPage.query.get_or_404(page_id) + db.session.delete(page) + db.session.commit() + return redirect(url_for('main.docx_schema_pages')) diff --git a/src/app/routes/routes.py b/src/app/routes/routes.py index 5fe997db..ab3b7f51 100644 --- a/src/app/routes/routes.py +++ b/src/app/routes/routes.py @@ -57,7 +57,8 @@ ElementCompetenceParCours, Cours, CoursProgramme, - ListeCegep + ListeCegep, + DocxSchemaPage ) from ...extensions import limiter from ...utils.decorator import role_required, roles_required, ensure_profile_completed @@ -80,6 +81,14 @@ def version(): from ...config.version import __version__ return jsonify({'version': __version__}) + +@main.route('/parametres') +@login_required +@ensure_profile_completed +def parametres_alias(): + docx_schemas = DocxSchemaPage.query.order_by(DocxSchemaPage.created_at.desc()).all() + return render_template('parametres.html', docx_schemas=docx_schemas) + # Public: Health endpoint @main.route('/health') @public_route diff --git a/src/app/routes/settings.py b/src/app/routes/settings.py index 109a3a98..6a67981c 100644 --- a/src/app/routes/settings.py +++ b/src/app/routes/settings.py @@ -21,6 +21,7 @@ ) from .evaluation import AISixLevelGridResponse from ...utils.decorator import role_required, roles_required, ensure_profile_completed +from .admin_docx_schema import DEFAULT_DOCX_TO_SCHEMA_PROMPT csrf = CSRFProtect() @@ -42,6 +43,7 @@ SectionAISettings, OcrPromptSettings, PlanCadreImportPromptSettings, + DocxSchemaPage, ) settings_bp = Blueprint('settings', __name__, url_prefix='/settings') @@ -828,7 +830,8 @@ def edit_plan_de_cours_prompt(prompt_id): @login_required # Cette route nécessite que l'utilisateur soit connecté @ensure_profile_completed def parametres(): - return render_template('parametres.html') + docx_schemas = DocxSchemaPage.query.order_by(DocxSchemaPage.created_at.desc()).all() + return render_template('parametres.html', docx_schemas=docx_schemas) @settings_bp.route("/gestion-plans-cours", methods=["GET"]) @roles_required('admin', 'coordo') @@ -921,3 +924,85 @@ def prompt_settings(): flash(f'Erreur lors de la mise à jour : {str(e)}', 'error') return render_template('settings/prompt_settings.html', settings=settings, ai_form=ai_form) + + +@settings_bp.route('/docx_to_schema_prompts', methods=['GET', 'POST']) +@login_required +@role_required('admin') +@ensure_profile_completed +def docx_to_schema_prompt_settings(): + """Configurer le prompt système et les paramètres IA pour DOCX→JSON.""" + sa = SectionAISettings.get_for('docx_to_schema') + ai_form = SectionAISettingsForm(obj=sa) + if request.method == 'GET' and not (sa.system_prompt and sa.system_prompt.strip()): + ai_form.system_prompt.data = DEFAULT_DOCX_TO_SCHEMA_PROMPT + if request.method == 'POST' and ai_form.validate_on_submit(): + sa.system_prompt = ai_form.system_prompt.data or None + sa.ai_model = ai_form.ai_model.data or None + sa.reasoning_effort = ai_form.reasoning_effort.data or None + sa.verbosity = ai_form.verbosity.data or None + db.session.commit() + flash('Paramètres enregistrés', 'success') + return redirect(url_for('settings.docx_to_schema_prompt_settings')) + return render_template('settings/docx_to_schema_prompts.html', ai_form=ai_form) + + +@settings_bp.route('/docx_schema//prompts', methods=['GET', 'POST']) +@login_required +@role_required('admin') +@ensure_profile_completed +def docx_schema_prompt_settings(page_id): + """Configurer le prompt système et les paramètres IA pour un schéma de données spécifique.""" + page = DocxSchemaPage.query.get_or_404(page_id) + + sa_gen = SectionAISettings.get_for(f'docx_schema_{page_id}') + sa_impv = SectionAISettings.get_for(f'docx_schema_{page_id}_improve') + sa_impt = SectionAISettings.get_for(f'docx_schema_{page_id}_import') + + if request.method == 'POST': + form_name = request.form.get('form_name') + target_map = { + 'gen': sa_gen, + 'impv': sa_impv, + 'impt': sa_impt, + } + target_sa = target_map.get(form_name) + form = SectionAISettingsForm(request.form, obj=target_sa) if target_sa else None + if target_sa and form and form.validate(): + target_sa.system_prompt = form.system_prompt.data or None + target_sa.ai_model = form.ai_model.data or None + target_sa.reasoning_effort = form.reasoning_effort.data or None + target_sa.verbosity = form.verbosity.data or None + db.session.commit() + flash('Paramètres enregistrés', 'success') + return redirect(url_for('settings.docx_schema_prompt_settings', page_id=page_id)) + + default_gen = ( + "Tu es un assistant qui retourne une sortie strictement conforme au schéma JSON fourni." + ) + default_impv = ( + "Tu es un assistant qui améliore une sortie existante tout en respectant le schéma JSON fourni." + ) + default_impt = ( + "Tu es un assistant qui extrait des données d'un document et renvoie une sortie strictement conforme au schéma JSON fourni." + ) + + ai_form_gen = SectionAISettingsForm(obj=sa_gen) + ai_form_impv = SectionAISettingsForm(obj=sa_impv) + ai_form_impt = SectionAISettingsForm(obj=sa_impt) + + if request.method == 'GET': + if not (sa_gen.system_prompt or '').strip(): + ai_form_gen.system_prompt.data = default_gen + if not (sa_impv.system_prompt or '').strip(): + ai_form_impv.system_prompt.data = default_impv + if not (sa_impt.system_prompt or '').strip(): + ai_form_impt.system_prompt.data = default_impt + + return render_template( + 'settings/docx_schema_prompts.html', + ai_form_gen=ai_form_gen, + ai_form_impv=ai_form_impv, + ai_form_impt=ai_form_impt, + page=page, + ) diff --git a/src/app/tasks/__init__.py b/src/app/tasks/__init__.py index 1b76bbfb..d58b6e1a 100644 --- a/src/app/tasks/__init__.py +++ b/src/app/tasks/__init__.py @@ -7,3 +7,4 @@ from .import_plan_cadre import import_plan_cadre_preview_task from .generation_logigramme import generate_programme_logigramme_task from .generation_grille import generate_programme_grille_task +from .docx_to_schema import docx_to_json_schema_task diff --git a/src/app/tasks/docx_to_schema.py b/src/app/tasks/docx_to_schema.py new file mode 100644 index 00000000..28f2bc6b --- /dev/null +++ b/src/app/tasks/docx_to_schema.py @@ -0,0 +1,229 @@ +import os +import json +import logging +import subprocess +from typing import Optional + +from celery import shared_task +from docx import Document +from openai import OpenAI +from pydantic import BaseModel + +from ..models import User, db +from .import_plan_cadre import _create_pdf_from_text + +logger = logging.getLogger(__name__) + + +class DocxSchemaResponse(BaseModel): + """Structure attendue de la sortie OpenAI pour un schéma DOCX.""" + title: str + description: str + schema: str + markdown: str + + +DocxSchemaResponse.model_rebuild() + + +def _extract_reasoning_summary_from_response(response): + """Extract reasoning summary text from a Responses API result.""" + summary = "" + try: + if hasattr(response, "reasoning") and response.reasoning: + for r in response.reasoning: + for item in getattr(r, "summary", []) or []: + if getattr(item, "type", "") == "summary_text": + summary += getattr(item, "text", "") or "" + except Exception: + pass + if not summary: + try: + for out in getattr(response, "output", []) or []: + if getattr(out, "type", "") == "reasoning": + for item in getattr(out, "summary", []) or []: + if getattr(item, "type", "") == "summary_text": + summary += getattr(item, "text", "") or "" + except Exception: + pass + return summary.strip() + +def _docx_to_pdf(file_path: str) -> str: + """Ensure a PDF exists for the given file. + + If ``file_path`` already points to a PDF, it is returned as-is. Otherwise, + the function attempts to convert the DOCX file to PDF using LibreOffice and + falls back to a simple text-based PDF if the conversion fails. + Returns the path to the resulting PDF. + """ + if file_path.lower().endswith(".pdf"): + return file_path + + pdf_path = os.path.splitext(file_path)[0] + ".pdf" + outdir = os.path.dirname(file_path) or "." + try: + subprocess.run( + [ + "libreoffice", + "--headless", + "--convert-to", + "pdf", + "--outdir", + outdir, + file_path, + ], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if not os.path.exists(pdf_path): + raise FileNotFoundError("PDF conversion failed") + except Exception: + try: + doc = Document(file_path) + text = "\n".join(p.text for p in doc.paragraphs if p.text.strip()) + except Exception: + text = "" + _create_pdf_from_text(text or "Document importé", pdf_path) + return pdf_path + +@shared_task(bind=True, name="app.tasks.docx_to_schema.convert") +def docx_to_json_schema_task(self, file_path: str, model: str, reasoning: str, verbosity: str, system_prompt: str, user_id: int, openai_cls=OpenAI): + """Convert an uploaded document (DOCX or PDF) to a JSON Schema using OpenAI's file API with streaming.""" + task_id = self.request.id + logger.info("[%s] Starting DOCX→Schema for %s", task_id, file_path) + + user: Optional[User] + with db.session.no_autoflush: + user = db.session.get(User, user_id) + if not user or not user.openai_key: + return {"status": "error", "message": "Clé OpenAI manquante."} + + pdf_path = _docx_to_pdf(file_path) + client = openai_cls(api_key=user.openai_key) + with open(pdf_path, "rb") as fh: + uploaded = client.files.create(file=fh, purpose="user_data") + + def push(meta): + try: + self.update_state(state="PROGRESS", meta=meta) + except Exception: + logger.exception("Failed to update task state") + + input_blocks = [ + { + "role": "system", + "content": [{"type": "input_text", "text": system_prompt}], + }, + { + "role": "user", + "content": [{"type": "input_file", "file_id": uploaded.id}], + }, + ] + request_kwargs = dict( + model=model, + input=input_blocks, + text={"verbosity": verbosity}, + reasoning={"effort": reasoning, "summary": "auto"}, + tools=[], + store=True, + text_format=DocxSchemaResponse, + ) + + streamed_text = "" + reasoning_summary_text = "" + seq = 0 + try: + with client.responses.stream(**request_kwargs) as stream: + for event in stream: + etype = getattr(event, "type", "") or "" + if etype.endswith("response.output_text.delta") or etype == "response.output_text.delta": + delta = getattr(event, "delta", "") or getattr(event, "text", "") or "" + if delta: + streamed_text += delta + seq += 1 + push({ + "message": "Analyse en cours...", + "stream_chunk": delta, + "stream_buffer": streamed_text, + "seq": seq, + }) + elif etype.endswith("response.reasoning_summary_text.delta") or etype == "response.reasoning_summary_text.delta": + rs_delta = getattr(event, "delta", "") or "" + if rs_delta: + reasoning_summary_text += rs_delta + push({"message": "Résumé du raisonnement", "reasoning_summary": reasoning_summary_text}) + final = stream.get_final_response() + if not reasoning_summary_text: + reasoning_summary_text = _extract_reasoning_summary_from_response(final) + if reasoning_summary_text: + push({"message": "Résumé du raisonnement", "reasoning_summary": reasoning_summary_text}) + except Exception: + final = client.responses.create(**request_kwargs) + reasoning_summary_text = _extract_reasoning_summary_from_response(final) + if reasoning_summary_text: + push({"message": "Résumé du raisonnement", "reasoning_summary": reasoning_summary_text}) + try: + text = getattr(final, "output_text", "") or "" + if text: + push({"message": "Analyse terminée", "stream_buffer": text}) + except Exception: + pass + + usage = getattr(final, "usage", None) + api_usage = { + "prompt_tokens": getattr(usage, "input_tokens", 0), + "completion_tokens": getattr(usage, "output_tokens", 0), + "model": model, + } + + parsed_obj = None + try: + op = getattr(final, "output_parsed", None) + except Exception: + op = None + if op is not None: + if hasattr(op, "model_dump"): + parsed_obj = op.model_dump() + elif isinstance(op, dict): + parsed_obj = op + if parsed_obj is None: + json_text = None + try: + json_text = getattr(final, "output_text", None) + except Exception: + json_text = None + if not json_text and streamed_text: + json_text = streamed_text + if json_text: + try: + parsed_obj = json.loads(json_text) + except Exception: + parsed_obj = json_text + + if isinstance(parsed_obj, dict): + title = parsed_obj.get('title') + description = parsed_obj.get('description') + schema_obj = parsed_obj.get('schema') + if isinstance(schema_obj, str): + try: + schema_obj = json.loads(schema_obj) + except Exception: + pass + markdown = parsed_obj.get('markdown', '') + if isinstance(schema_obj, dict): + if title and 'title' not in schema_obj: + schema_obj['title'] = title + if description and 'description' not in schema_obj: + schema_obj['description'] = description + result_payload = { + 'title': title, + 'description': description, + 'schema': schema_obj, + 'markdown': markdown, + } + else: + result_payload = {'title': None, 'description': None, 'schema': parsed_obj, 'markdown': ''} + + logger.info("[%s] OpenAI usage: %s", task_id, api_usage) + return {"status": "success", "result": result_payload, "api_usage": api_usage} diff --git a/src/app/templates/base.html b/src/app/templates/base.html index d2b21a9c..abcba136 100644 --- a/src/app/templates/base.html +++ b/src/app/templates/base.html @@ -325,6 +325,13 @@ Gestion de programme + {% if current_user.is_authenticated and current_user.role == 'admin' %} + {% for p in docx_schema_pages %} + + {% endfor %} + {% endif %} + +
+

+ +

+ +
+

diff --git a/src/app/templates/settings/docx_schema_prompts.html b/src/app/templates/settings/docx_schema_prompts.html new file mode 100644 index 00000000..1259e8ae --- /dev/null +++ b/src/app/templates/settings/docx_schema_prompts.html @@ -0,0 +1,103 @@ +{% extends "parametres.html" %} +{% block parametres_content %} +

{{ page.title }} – Paramètres IA

+
+
Paramètres IA
+
+ +
+
+
+ + {{ ai_form_gen.csrf_token }} +
+ + {{ ai_form_gen.system_prompt(class_='form-control font-monospace', rows=12) }} +
+
+
+ + {{ ai_form_gen.ai_model(class_='form-select') }} +
+
+ + {{ ai_form_gen.reasoning_effort(class_='form-select') }} +
+
+ + {{ ai_form_gen.verbosity(class_='form-select') }} +
+
+
+ +
+
+
+
+
+ + {{ ai_form_impv.csrf_token }} +
+ + {{ ai_form_impv.system_prompt(class_='form-control font-monospace', rows=12) }} +
+
+
+ + {{ ai_form_impv.ai_model(class_='form-select') }} +
+
+ + {{ ai_form_impv.reasoning_effort(class_='form-select') }} +
+
+ + {{ ai_form_impv.verbosity(class_='form-select') }} +
+
+
+ +
+
+
+
+
+ + {{ ai_form_impt.csrf_token }} +
+ + {{ ai_form_impt.system_prompt(class_='form-control font-monospace', rows=12) }} +
+
+
+ + {{ ai_form_impt.ai_model(class_='form-select') }} +
+
+ + {{ ai_form_impt.reasoning_effort(class_='form-select') }} +
+
+ + {{ ai_form_impt.verbosity(class_='form-select') }} +
+
+
+ +
+
+
+
+
+
+{% endblock %} diff --git a/src/app/templates/settings/docx_to_schema.html b/src/app/templates/settings/docx_to_schema.html new file mode 100644 index 00000000..206df364 --- /dev/null +++ b/src/app/templates/settings/docx_to_schema.html @@ -0,0 +1,59 @@ +{% extends "parametres.html" %} +{% block parametres_content %} +

Conversion DOCX en JSON Schema

+
+ {{ form.hidden_tag() }} +
+ {{ form.file.label(class_='form-label') }} + {{ form.file(class_='form-control') }} +
+
{{ form.submit(class_='btn btn-primary') }}
+
+{% endblock %} +{% block scripts %} + +{% endblock %} diff --git a/src/app/templates/settings/docx_to_schema_prompts.html b/src/app/templates/settings/docx_to_schema_prompts.html new file mode 100644 index 00000000..72f2dc61 --- /dev/null +++ b/src/app/templates/settings/docx_to_schema_prompts.html @@ -0,0 +1,28 @@ +{% extends "parametres.html" %} +{% block parametres_content %} +

DOCX → JSON – Paramètres IA

+
+ +
+ + {{ ai_form.system_prompt(class_='form-control font-monospace', rows=12) }} +
+
+
+ + {{ ai_form.ai_model(class_='form-select') }} +
+
+ + {{ ai_form.reasoning_effort(class_='form-select') }} +
+
+ + {{ ai_form.verbosity(class_='form-select') }} +
+
+
+ +
+
+{% endblock %} diff --git a/src/migrations/versions/3a2b1c4d5e6f_add_markdown_to_docx_schema.py b/src/migrations/versions/3a2b1c4d5e6f_add_markdown_to_docx_schema.py new file mode 100644 index 00000000..a3afebb6 --- /dev/null +++ b/src/migrations/versions/3a2b1c4d5e6f_add_markdown_to_docx_schema.py @@ -0,0 +1,21 @@ +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = '3a2b1c4d5e6f' +down_revision = 'd420453e4321' +branch_labels = None +depends_on = None + +def upgrade(): + bind = op.get_bind() + insp = sa.inspect(bind) + if 'docx_schema_pages' in insp.get_table_names(): + cols = {c['name'] for c in insp.get_columns('docx_schema_pages')} + if 'markdown_content' not in cols: + with op.batch_alter_table('docx_schema_pages', schema=None) as batch_op: + batch_op.add_column(sa.Column('markdown_content', sa.Text(), nullable=True)) + +def downgrade(): + with op.batch_alter_table('docx_schema_pages', schema=None) as batch_op: + batch_op.drop_column('markdown_content') diff --git a/src/tests/conftest.py b/src/tests/conftest.py new file mode 100644 index 00000000..fc86f42d --- /dev/null +++ b/src/tests/conftest.py @@ -0,0 +1,8 @@ +import os +import pytest + +@pytest.fixture(autouse=True) +def _set_env_defaults(): + os.environ.setdefault('SECRET_KEY', 'test') + os.environ.setdefault('RECAPTCHA_PUBLIC_KEY', 'test') + os.environ.setdefault('RECAPTCHA_PRIVATE_KEY', 'test') diff --git a/src/tests/test_asgi_sse_cors.py b/src/tests/test_asgi_sse_cors.py index 5c7a20a7..fc5c0c70 100644 --- a/src/tests/test_asgi_sse_cors.py +++ b/src/tests/test_asgi_sse_cors.py @@ -6,11 +6,11 @@ import pytest -@pytest.mark.asyncio -async def test_tasks_events_sse_includes_cors_header_direct(): - # Call the SSE handler directly to validate response headers +def test_tasks_events_sse_includes_cors_header_direct(): + # Call the SSE handler directly to validate response headers without requiring asyncio plugin from src.asgi import sse_task_events from starlette.requests import Request + import asyncio scope = { "type": "http", @@ -18,7 +18,7 @@ async def test_tasks_events_sse_includes_cors_header_direct(): "headers": [], } req = Request(scope) - resp = await sse_task_events(req) + resp = asyncio.run(sse_task_events(req)) assert resp.headers.get("Access-Control-Allow-Origin") == "*" diff --git a/src/utils/logging_config.py b/src/utils/logging_config.py index 7ac15de7..daf7c646 100644 --- a/src/utils/logging_config.py +++ b/src/utils/logging_config.py @@ -37,12 +37,33 @@ def format(self, record: logging.LogRecord) -> str: return f"{base} | context={context}" return base + +class SafeStreamHandler(logging.StreamHandler): + """StreamHandler that ignores writes after the stream is closed. + + During test teardown the standard streams may be closed before + application atexit handlers run. Emitting log records in that + scenario normally raises ``ValueError: I/O operation on closed file``. + This handler quietly drops such records so that logging during + shutdown does not generate noisy tracebacks. + """ + + def emit(self, record: logging.LogRecord) -> None: # pragma: no cover - tiny wrapper + stream = getattr(self, "stream", None) + if not stream or getattr(stream, "closed", False): + return + try: + super().emit(record) + except Exception: + # Ignore logging errors at interpreter shutdown + pass + def setup_logging(level: int = logging.INFO) -> None: """Configure root logger with a standard format once.""" global _LOGGING_CONFIGURED if _LOGGING_CONFIGURED: return - handler = logging.StreamHandler() + handler = SafeStreamHandler() handler.setFormatter(ContextFormatter(LOG_FORMAT)) root_logger = logging.getLogger() root_logger.setLevel(level) diff --git a/tests/conftest.py b/tests/conftest.py index 14062d06..213954d3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,6 +2,13 @@ import pytest import sys +# Ensure required env vars for app initialization in tests +os.environ.setdefault('SECRET_KEY', 'test') +os.environ.setdefault('RECAPTCHA_PUBLIC_KEY', 'test') +os.environ.setdefault('RECAPTCHA_PRIVATE_KEY', 'test') +os.environ.setdefault('CELERY_BROKER_URL', 'memory://') +os.environ.setdefault('CELERY_RESULT_BACKEND', 'cache+memory://') + # Ensure that the application's source code is importable. sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) from src.app import create_app, db diff --git a/tests/tasks/test_docx_to_schema_task.py b/tests/tasks/test_docx_to_schema_task.py new file mode 100644 index 00000000..85f0d091 --- /dev/null +++ b/tests/tasks/test_docx_to_schema_task.py @@ -0,0 +1,157 @@ +from pathlib import Path +from types import SimpleNamespace +import logging +import json + +from docx import Document + +from src.app.models import User, db, OpenAIModel + + +class DummySelf: + def __init__(self): + self.request = type('R', (), {'id': 'tid'})() + self.updates = [] + + def update_state(self, state=None, meta=None): + self.updates.append(meta or {}) + + +class DummyEvent: + def __init__(self, t, delta): + self.type = t + self.delta = delta + + +class FakeStream: + def __init__(self, events, final): + self.events = events + self._final = final + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def __iter__(self): + return iter(self.events) + + def get_final_response(self): + return self._final + + +class FakeResponses: + def __init__(self, output_text): + self.kwargs = None + self.output_text = output_text + + def stream(self, **kwargs): + self.kwargs = kwargs + events = [ + DummyEvent('response.output_text.delta', 'hello'), + DummyEvent('response.reasoning_summary_text.delta', 'because'), + ] + class Usage: + input_tokens = 1 + output_tokens = 2 + class Resp: + usage = Usage() + output_parsed = None + output_text = self.output_text + return FakeStream(events, Resp()) + + +class FakeFiles: + def create(self, file=None, purpose=None): # noqa: ARG002 + return type('F', (), {'id': 'fid'})() + + +class FakeOpenAI: + expected_json = None + last_instance = None + + def __init__(self, api_key=None): # noqa: ARG002 + self.files = FakeFiles() + self.responses = FakeResponses(json.dumps(FakeOpenAI.expected_json)) + FakeOpenAI.last_instance = self + + +def test_docx_to_schema_streaming(app, tmp_path, monkeypatch, caplog): + caplog.set_level(logging.INFO) + docx_path = tmp_path / 'test.docx' + doc = Document() + doc.add_paragraph('Hello world') + doc.save(docx_path) + + def fake_run(cmd, **kwargs): # noqa: ARG001 + assert 'libreoffice' in cmd[0] + pdf_path = Path(cmd[-1]).with_suffix('.pdf') + pdf_path.write_bytes(b'%PDF-1.4') + return SimpleNamespace(returncode=0) + + import src.app.tasks.docx_to_schema as module + monkeypatch.setattr(module, 'subprocess', SimpleNamespace(run=fake_run)) + + FakeOpenAI.expected_json = { + 'title': 'T', + 'description': 'D', + 'schema': json.dumps({'title': 'T', 'description': 'D', 'type': 'object', 'properties': {}}), + 'markdown': '# md', + } + + with app.app_context(): + user = User(username='u', password='pw', role='user', openai_key='sk', credits=1.0, is_first_connexion=False) + db.session.add(user) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + uid = user.id + + dummy = DummySelf() + orig = module.docx_to_json_schema_task.__wrapped__.__func__ + prompt = "Propose un schéma JSON simple" + result = orig(dummy, str(docx_path), 'gpt-4o-mini', 'medium', 'medium', prompt, uid, FakeOpenAI) + assert result['status'] == 'success' + assert any('stream_chunk' in u for u in dummy.updates) + assert any(u.get('stream_chunk') and u.get('message') == 'Analyse en cours...' for u in dummy.updates) + assert any(u.get('message') == 'Résumé du raisonnement' for u in dummy.updates) + assert result['result']['title'] == 'T' + assert result['result']['description'] == 'D' + assert result['result']['schema']['title'] == 'T' + assert result['result']['markdown'] == '# md' + called_kwargs = FakeOpenAI.last_instance.responses.kwargs + assert called_kwargs['store'] is True + assert called_kwargs['text_format'].__name__ == 'DocxSchemaResponse' + assert 'Propose un schéma JSON simple' in called_kwargs['input'][0]['content'][0]['text'] + assert 'OpenAI usage' in caplog.text + + +def test_docx_to_schema_with_pdf_input(app, tmp_path, monkeypatch): + pdf_path = tmp_path / 'test.pdf' + pdf_path.write_bytes(b'%PDF-1.4') + + def fake_run(cmd, **kwargs): # noqa: ARG001 + raise AssertionError('libreoffice should not run for PDF input') + + import src.app.tasks.docx_to_schema as module + monkeypatch.setattr(module, 'subprocess', SimpleNamespace(run=fake_run)) + + FakeOpenAI.expected_json = { + 'title': 'T', + 'description': 'D', + 'schema': json.dumps({'title': 'T', 'description': 'D', 'type': 'object', 'properties': {}}), + 'markdown': '# md', + } + + with app.app_context(): + user = User(username='u2', password='pw', role='user', openai_key='sk', credits=1.0, is_first_connexion=False) + db.session.add(user) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + uid = user.id + + dummy = DummySelf() + orig = module.docx_to_json_schema_task.__wrapped__.__func__ + prompt = "Propose un schéma JSON simple" + result = orig(dummy, str(pdf_path), 'gpt-4o-mini', 'medium', 'medium', prompt, uid, FakeOpenAI) + assert result['status'] == 'success' diff --git a/tests/test_docx_schema_prompts.py b/tests/test_docx_schema_prompts.py new file mode 100644 index 00000000..9002ab3e --- /dev/null +++ b/tests/test_docx_schema_prompts.py @@ -0,0 +1,61 @@ +from werkzeug.security import generate_password_hash +from src.app.models import User, db, OpenAIModel, SectionAISettings + + +def _login(client, user_id): + with client.session_transaction() as sess: + sess['_user_id'] = str(user_id) + sess['_fresh'] = True + + +def test_docx_schema_prompts_page(app, client): + with app.app_context(): + admin = User( + username='admins', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + _login(client, admin_id) + resp = client.post('/docx_to_schema/validate', json={'schema': {'title': 'Test', 'type': 'object'}, 'markdown': '# md'}) + page_id = resp.get_json()['page_id'] + resp = client.get('/parametres') + assert f'/settings/docx_schema/{page_id}/prompts'.encode() in resp.data + + # La page de paramètres doit afficher les trois onglets + resp = client.get(f'/settings/docx_schema/{page_id}/prompts') + assert resp.status_code == 200 + for label in (b'G\xc3\xa9n\xc3\xa9ration', b'Am\xc3\xa9lioration', b'Importation'): + assert label in resp.data + + # Soumettre chaque formulaire et vérifier l'enregistrement + for form_name, prompt_text in ( + ('gen', 'Gen'), + ('impv', 'Impv'), + ('impt', 'Impt'), + ): + resp = client.post( + f'/settings/docx_schema/{page_id}/prompts', + data={ + 'form_name': form_name, + 'system_prompt': prompt_text, + 'ai_model': '', + 'reasoning_effort': '', + 'verbosity': '' + }, + follow_redirects=True + ) + assert resp.status_code == 200 + + with app.app_context(): + sa_gen = SectionAISettings.query.filter_by(section=f'docx_schema_{page_id}').first() + sa_impv = SectionAISettings.query.filter_by(section=f'docx_schema_{page_id}_improve').first() + sa_impt = SectionAISettings.query.filter_by(section=f'docx_schema_{page_id}_import').first() + assert sa_gen and sa_gen.system_prompt == 'Gen' + assert sa_impv and sa_impv.system_prompt == 'Impv' + assert sa_impt and sa_impt.system_prompt == 'Impt' diff --git a/tests/test_docx_to_schema_csrf.py b/tests/test_docx_to_schema_csrf.py new file mode 100644 index 00000000..323f1fe9 --- /dev/null +++ b/tests/test_docx_to_schema_csrf.py @@ -0,0 +1,141 @@ +from io import BytesIO +from bs4 import BeautifulSoup +from werkzeug.security import generate_password_hash + +from src.app.models import User, db, OpenAIModel, DocxSchemaPage +import src.app.tasks.docx_to_schema as docx_tasks + + +def _login(client, user_id): + with client.session_transaction() as sess: + sess['_user_id'] = str(user_id) + sess['_fresh'] = True + + +def test_docx_to_schema_requires_csrf(app, client, monkeypatch): + with app.app_context(): + admin = User( + username='admin', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + + _login(client, admin_id) + app.config['WTF_CSRF_ENABLED'] = True + + # Patch Celery task delay to avoid running Celery + class Dummy: + id = 'tid' + monkeypatch.setattr( + docx_tasks.docx_to_json_schema_task, + 'delay', + lambda *a, **k: Dummy() + ) + + data = { + 'file': (BytesIO(b'hi'), 'test.docx'), + } + # Missing CSRF token + resp = client.post( + '/docx_to_schema/start', + data=data, + content_type='multipart/form-data' + ) + assert resp.status_code == 400 + + # Fetch page to get token + page = client.get('/docx_to_schema') + soup = BeautifulSoup(page.data, 'html.parser') + token = soup.find('input', {'name': 'csrf_token'})['value'] + + data = { + 'csrf_token': token, + 'file': (BytesIO(b'hi'), 'test.docx'), + } + resp2 = client.post( + '/docx_to_schema/start', + data=data, + content_type='multipart/form-data' + ) + assert resp2.status_code == 202 + assert resp2.get_json()['task_id'] == 'tid' + + +def test_docx_schema_rename_requires_csrf(app, client): + with app.app_context(): + admin = User( + username='renamer', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + page = DocxSchemaPage(title='Old', json_schema={'title': 'Old', 'type': 'object'}) + db.session.add_all([admin, page]) + db.session.commit() + admin_id = admin.id + page_id = page.id + + _login(client, admin_id) + app.config['WTF_CSRF_ENABLED'] = True + + # Missing token + resp = client.post(f'/docx_schema/{page_id}/rename', json={'title': 'New'}) + assert resp.status_code == 400 + + listing = client.get('/docx_schema') + soup = BeautifulSoup(listing.data, 'html.parser') + token = soup.find('meta', {'name': 'csrf-token'})['content'] + + resp_ok = client.post( + f'/docx_schema/{page_id}/rename', + json={'title': 'Renamed'}, + headers={'X-CSRFToken': token}, + ) + assert resp_ok.status_code == 200 + assert resp_ok.get_json()['success'] is True + with app.app_context(): + assert db.session.get(DocxSchemaPage, page_id).title == 'Renamed' + + +def test_docx_to_schema_accepts_pdf(app, client, monkeypatch): + with app.app_context(): + admin = User( + username='admin_pdf', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + + _login(client, admin_id) + + class Dummy: + id = 'tid' + + monkeypatch.setattr( + docx_tasks.docx_to_json_schema_task, + 'delay', + lambda *a, **k: Dummy(), + ) + + data = { + 'file': (BytesIO(b'%PDF-1.4', ), 'test.pdf'), + } + resp = client.post( + '/docx_to_schema/start', + data=data, + content_type='multipart/form-data', + ) + assert resp.status_code == 202 + assert resp.get_json()['task_id'] == 'tid' diff --git a/tests/test_docx_to_schema_ui.py b/tests/test_docx_to_schema_ui.py new file mode 100644 index 00000000..48766326 --- /dev/null +++ b/tests/test_docx_to_schema_ui.py @@ -0,0 +1,443 @@ +from werkzeug.security import generate_password_hash +from src.app.models import User, db, OpenAIModel +from html import unescape + + +def _login(client, user_id): + with client.session_transaction() as sess: + sess['_user_id'] = str(user_id) + sess['_fresh'] = True + + +def test_docx_to_schema_page_contains_start_endpoint(app, client): + with app.app_context(): + admin = User( + username='admin', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + _login(client, admin_id) + resp = client.get('/docx_to_schema') + assert resp.status_code == 200 + data = resp.data + assert b'/docx_to_schema/start' in data + assert b'name="model"' not in data + assert b'name="reasoning_level"' not in data + assert b'name="verbosity"' not in data + assert b'onDone' in data + + +def test_docx_to_schema_preview_page(app, client): + with app.app_context(): + admin = User( + username='adminp', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + _login(client, admin_id) + schema = {'title': 'Preview', 'type': 'object'} + client.post('/docx_to_schema/preview', json={'schema': schema, 'markdown': '# Titre\nContenu', 'title': 'Preview', 'description': 'Desc'}) + resp = client.get('/docx_to_schema/preview') + assert resp.status_code == 200 + assert b'id="schemaAccordion"' in resp.data + assert b'id="schemaValidateBtn"' in resp.data + assert b'id="schemaResultMarkdown"' in resp.data + assert b'zoom.transform' in resp.data + + +def test_parametres_page_has_docx_conversion_links(app, client): + with app.app_context(): + admin = User( + username='admin2', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + _login(client, admin_id) + resp = client.get('/parametres') + assert resp.status_code == 200 + assert b'/docx_to_schema' in resp.data + assert b'/settings/docx_to_schema_prompts' in resp.data + assert b'/docx_schema"' in resp.data + + +def test_docx_to_schema_validate_endpoint(app, client): + with app.app_context(): + admin = User( + username='admin3', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + _login(client, admin_id) + payload = {'schema': {'type': 'object'}, 'markdown': '# md', 'title': 'Sample', 'description': 'Desc'} + resp = client.post('/docx_to_schema/validate', json=payload) + assert resp.status_code == 201 + data = resp.get_json() + assert data['success'] is True + page_id = data['page_id'] + # Page accessible + resp = client.get(f'/docx_schema/{page_id}') + assert resp.status_code == 200 + assert b'Sample' in resp.data + assert b'id="planCadreForm"' in resp.data + assert b'id="actionBar"' in resp.data + assert b'id="planCadreAccordion"' in resp.data + assert b'id="schemaEditBtn"' not in resp.data + assert b'id="schemaResultMarkdown"' not in resp.data + assert b'id="schemaAccordion"' not in resp.data + # JSON page now holds preview details + resp_json = client.get(f'/docx_schema/{page_id}/json') + assert resp_json.status_code == 200 + assert b'id="schemaEditBtn"' in resp_json.data + assert b'id="schemaResultMarkdown"' in resp_json.data + assert b'id="schemaAccordion"' in resp_json.data + assert b'd3.tree' in resp_json.data + assert b'd3.drag' in resp_json.data + assert b'legend' in resp_json.data + assert b"n.type === 'object' && n.properties" in resp_json.data + assert b'zoom.transform' in resp_json.data + + +def test_navbar_updates_with_schema_links(app, client): + with app.app_context(): + admin = User( + username='admin4', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + _login(client, admin_id) + resp = client.get('/parametres') + assert b'/docx_schema/' not in resp.data + resp = client.post('/docx_to_schema/validate', json={'schema': {'title': 'Link', 'type': 'object'}, 'markdown': '# md'}) + page_id = resp.get_json()['page_id'] + resp = client.get('/parametres') + assert f'/docx_schema/{page_id}'.encode() in resp.data + + +def test_docx_schema_management(app, client): + with app.app_context(): + admin = User( + username='admin6', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + _login(client, admin_id) + # Create schema page + resp = client.post('/docx_to_schema/validate', json={'schema': {'title': 'Manage', 'type': 'object'}, 'markdown': '# md'}) + page_id = resp.get_json()['page_id'] + # List page includes it + resp = client.get('/docx_schema') + assert b'>Manage<' in resp.data + # Rename schema + resp = client.post(f'/docx_schema/{page_id}/rename', json={'title': 'Renamed'}) + assert resp.status_code == 200 + resp = client.get('/docx_schema') + assert b'>Renamed<' in resp.data + assert b'>Manage<' not in resp.data + # Edit schema + resp = client.post(f'/docx_schema/{page_id}/edit', json={'schema': {'title': 'Updated', 'type': 'object'}}) + assert resp.status_code == 200 + resp = client.get(f'/docx_schema/{page_id}') + assert b'Updated' in resp.data + assert b'id="schemaResultMarkdown"' not in resp.data + resp_json = client.get(f'/docx_schema/{page_id}/json') + assert b'Updated' in resp_json.data + assert b'id="schemaResultMarkdown"' in resp_json.data + # Delete + resp = client.post(f'/docx_schema/{page_id}/delete') + assert resp.status_code == 302 + resp = client.get('/docx_schema') + assert b'>Updated<' not in resp.data + + +def test_docx_to_schema_prompts_page(app, client): + with app.app_context(): + admin = User( + username='admin5', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + _login(client, admin_id) + resp = client.get('/settings/docx_to_schema_prompts') + assert resp.status_code == 200 + assert b'DOCX \xe2\x86\x92 JSON' in resp.data + assert b'Prompt syst' in resp.data + assert b'Mod' in resp.data + assert b'Niveau de raisonnement' in resp.data + assert b'Verbosit' in resp.data + assert 'Propose un schéma JSON simple'.encode('utf-8') in resp.data + + +def test_docx_schema_preview_buttons_and_lists(app, client): + with app.app_context(): + admin = User( + username='array_admin', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + _login(client, admin_id) + schema = { + 'title': 'ArraySample', + 'type': 'object', + 'properties': { + 'items': { + 'type': 'array', + 'items': {'type': 'string', 'title': 'It'} + } + } + } + resp = client.post('/docx_to_schema/validate', json={'schema': schema, 'markdown': '# md\n- a'}) + assert resp.status_code == 201 + page_id = resp.get_json()['page_id'] + resp = client.get(f'/docx_schema/{page_id}') + data = resp.data + assert b'id="schemaImportBtn"' in data + assert b'id="schemaImproveBtn"' in data + assert b'id="schemaGenerateBtn"' in data + assert b'id="schemaExportBtn"' in data + # Array/list controls are now on the JSON page + resp_json = client.get(f'/docx_schema/{page_id}/json') + json_data = resp_json.data + assert b'add-array-item' in json_data + assert b'add-list-item' in json_data + + +def test_docx_schema_preview_plan_form(app, client): + with app.app_context(): + admin = User( + username='plan_admin', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + _login(client, admin_id) + schema = { + 'title': 'Form', + 'type': 'object', + 'properties': { + 'name': {'type': 'string', 'title': 'Nom'}, + 'tags': {'type': 'array', 'items': {'type': 'string'}} + } + } + resp = client.post('/docx_to_schema/validate', json={'schema': schema, 'markdown': '# md'}) + assert resp.status_code == 201 + page_id = resp.get_json()['page_id'] + resp = client.get(f'/docx_schema/{page_id}') + data = resp.data + assert b'id="planCadreForm"' in data + assert b'id="actionBar"' in data + assert b'id="floatingSaveBtn"' in data + assert b'add-form-array-item' in data + + +def test_docx_schema_preview_plan_form_order_and_nested(app, client): + with app.app_context(): + admin = User( + username='plan_order_admin', + password=generate_password_hash('pw'), + role='admin', + is_first_connexion=False, + openai_key='sk' + ) + db.session.add(admin) + db.session.add(OpenAIModel(name='gpt-4o-mini', input_price=0.0, output_price=0.0)) + db.session.commit() + admin_id = admin.id + _login(client, admin_id) + schema = { + 'title': 'Form', + 'type': 'object', + 'properties': { + 'summary': {'type': 'string', 'title': 'Résumé'}, + 'section': { + 'title': 'Section', + 'type': 'array', + 'items': { + 'type': 'object', + 'properties': { + 'title': {'type': 'string', 'title': 'Titre'}, + 'notes': {'type': 'array', 'items': {'type': 'string', 'title': 'Note'}} + } + } + } + } + } + markdown = '## Section\n- Titre\n- Note\n## Résumé' + resp = client.post('/docx_to_schema/validate', json={'schema': schema, 'markdown': markdown}) + assert resp.status_code == 201 + page_id = resp.get_json()['page_id'] + resp = client.get(f'/docx_schema/{page_id}') + data = unescape(resp.data.decode('utf-8')) + assert 'markdownOrderMap' in data + assert 'buildMarkdownOrderMap' in data + assert 'markdownOrderMap = buildMarkdownOrderMap(markdownData);' in data + assert 'path.replace(/\\[[0-9]+\\]/g, \'\')' in data + assert 'getMdOrder(' in data + assert 'getMarkdownIndex' in data + assert 'normalizedMarkdown' in data + assert 'position-absolute top-0 end-0 remove-form-array-item' in data + assert 'normalizeName(' in data + + +def test_markdown_plain_text_ordering(): + import unicodedata, re + + def normalize_name(s): + s = unicodedata.normalize('NFD', s or '') + s = ''.join(c for c in s if unicodedata.category(c) != 'Mn') + s = re.sub(r'[^a-zA-Z0-9]+', ' ', s) + return s.strip().lower() + + def sort_by_markdown(schema, markdown): + norm_md = normalize_name(markdown) + entries = list(schema['properties'].items()) + def position(item): + key, val = item + name = normalize_name(val.get('title') or key) + idx = norm_md.find(name) + return idx if idx != -1 else float('inf') + entries.sort(key=position) + return [k for k, _ in entries] + + schema = { + 'title': 'Plain', + 'type': 'object', + 'properties': { + 'first': {'type': 'string', 'title': 'Premier'}, + 'second': {'type': 'string', 'title': 'Deuxième'}, + } + } + markdown = 'Deuxième\nPremier' + ordered = sort_by_markdown(schema, markdown) + assert ordered == ['second', 'first'] + + +def test_markdown_nested_array_ordering(): + import unicodedata, re + + def normalize_name(s): + s = unicodedata.normalize('NFD', s or '') + s = ''.join(c for c in s if unicodedata.category(c) != 'Mn') + s = re.sub(r'[^a-zA-Z0-9]+', ' ', s) + return s.strip().lower() + + def build_markdown_order_map(markdown): + lines = markdown.splitlines() + order = {'root': []} + headings = [] + list_stack = [] + for line in lines: + m = re.match(r'^(#+)\s+(.*)', line) + if m: + depth = len(m.group(1)) + text = normalize_name(m.group(2)) + if len(headings) < depth - 1: + headings.extend([None] * (depth - 1 - len(headings))) + headings = headings[:depth - 1] + headings.append(text) + list_stack = [] + parent = '.'.join(h for h in headings[:-1] if h) or 'root' + order.setdefault(parent, []).append(text) + continue + m = re.match(r'^(\s*)[-*+]\s+(.*)', line) + if m: + indent = len(m.group(1)) // 2 + text = normalize_name(m.group(2)) + list_stack = list_stack[:indent] + parent = '.'.join(h for h in headings + list_stack if h) or 'root' + order.setdefault(parent, []).append(text) + list_stack.append(text) + return order + + def sort_props(props, md_map, path): + entries = list(props.items()) + md_order = md_map.get(path, []) + def pos(item): + key, val = item + name = normalize_name(val.get('title') or key) + try: + return md_order.index(name) + except ValueError: + return float('inf') + entries.sort(key=pos) + return [k for k, _ in entries] + + schema = { + 'title': 'Root', + 'type': 'object', + 'properties': { + 'section': { + 'title': 'Section', + 'type': 'array', + 'items': { + 'title': 'Element', + 'type': 'object', + 'properties': { + 'title': {'type': 'string', 'title': 'Titre'}, + 'note': {'type': 'string', 'title': 'Note'}, + } + } + }, + 'summary': {'type': 'string', 'title': 'Résumé'} + } + } + + markdown = '## Section\n- Element\n - Titre\n - Note\n## Résumé' + md_map = build_markdown_order_map(markdown) + root_order = sort_props(schema['properties'], md_map, 'root') + assert root_order == ['section', 'summary'] + item_props = schema['properties']['section']['items']['properties'] + nested_order = sort_props(item_props, md_map, 'section.element') + assert nested_order == ['title', 'note'] diff --git a/tests/test_normalize_plan_schema.py b/tests/test_normalize_plan_schema.py new file mode 100644 index 00000000..f24e7763 --- /dev/null +++ b/tests/test_normalize_plan_schema.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +def normalize_plan_schema_py(node): + if not isinstance(node, dict): + return node + # allow further processing even if 'type' is present; only skip if already normalized + if isinstance(node, dict) and ('properties' in node or ('items' in node and isinstance(node['items'], dict) and 'type' in node['items'])): + return node + keys = list(node.keys()) + if not any(k in node for k in ('fields', 'parts', 'champs', 'element', 'type')) and len(keys) == 1 and keys[0] not in ('title', 'description'): + key = keys[0] + inner = normalize_plan_schema_py(node[key]) + if isinstance(inner, dict) and 'title' not in inner: + inner['title'] = key + return inner + if 'parts' in node: + props = {k: normalize_plan_schema_py(v) for k, v in node['parts'].items()} + out = {'type': 'object', 'properties': props} + if 'title' in node: + out['title'] = node['title'] + if 'description' in node: + out['description'] = node['description'] + return out + if 'fields' in node: + props = {k: normalize_plan_schema_py(v) for k, v in node['fields'].items()} + out = {'type': 'object', 'properties': props} + if 'title' in node: + out['title'] = node['title'] + if 'description' in node: + out['description'] = node['description'] + return out + if node.get('type') == 'array': + items = node.get('items') + if isinstance(items, dict) and not any(k in items for k in ('type', 'properties', 'items')): + item_props = {ik: {'type': iv if isinstance(iv, str) else 'string'} for ik, iv in items.items()} + items = {'type': 'object', 'properties': item_props} + else: + items = normalize_plan_schema_py(items) if items else {} + out = {'type': 'array', 'items': items} + if 'title' in node: + out['title'] = node['title'] + if 'description' in node: + out['description'] = node['description'] + return out + out = {'type': node.get('type', 'string')} + if 'title' in node: + out['title'] = node['title'] + if 'description' in node: + out['description'] = node['description'] + return out + + +def test_normalize_plan_schema_handles_parts_and_fields(): + raw = { + "plan_cadre": { + "title": "Plan-cadre", + "parts": { + "partie_1": { + "title": "Partie 1", + "fields": { + "programme": {"title": "Programme"}, + "competences": { + "title": "Compétences", + "type": "array", + "items": { + "code": "string", + "enonce": "string" + } + } + } + } + } + } + } + norm = normalize_plan_schema_py(raw) + assert norm["type"] == "object" + assert "partie_1" in norm["properties"] + p1 = norm["properties"]["partie_1"] + assert p1["type"] == "object" + assert "programme" in p1["properties"] + assert p1["properties"]["programme"]["type"] == "string" + comp = p1["properties"]["competences"] + assert comp["type"] == "array" + assert comp["items"]["type"] == "object" + assert "code" in comp["items"]["properties"]