From 905a99ca05449aa14a9af2f4098f85afb6cfb8ab Mon Sep 17 00:00:00 2001 From: DanielFoster88 Date: Sat, 16 May 2026 12:23:54 +0800 Subject: [PATCH] fix: include character count and limit in 'Document is too long' error When a paper's LaTeX source exceeds MAX_LATEX_CHARACTER_COUNT (300,000), the error message was just 'Document is too long' with no context. Now it includes: - The actual character count of the document - The configured limit - A suggestion to use a shorter version or split into sections This helps users understand why their paper was rejected and what they can do about it. Fixes #130 --- functions/import_pipeline/import_pipeline.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/functions/import_pipeline/import_pipeline.py b/functions/import_pipeline/import_pipeline.py index d58d2a6f..a1d37206 100644 --- a/functions/import_pipeline/import_pipeline.py +++ b/functions/import_pipeline/import_pipeline.py @@ -103,7 +103,12 @@ def import_arxiv_latex_and_pdf( raise if len(latex_string) > MAX_LATEX_CHARACTER_COUNT: - raise ValueError(f"Document is too long") + raise ValueError( + f"Document is too long: {len(latex_string):,} characters " + f"(limit is {MAX_LATEX_CHARACTER_COUNT:,}). " + f"Consider using a shorter version of the paper or " + f"splitting it into sections." + ) if existing_model_output_file: with open(existing_model_output_file, "r") as file: