diff --git a/api/__pycache__/settings.cpython-313.pyc b/api/__pycache__/settings.cpython-313.pyc index 23f6b64e..a9a5fcb1 100644 Binary files a/api/__pycache__/settings.cpython-313.pyc and b/api/__pycache__/settings.cpython-313.pyc differ diff --git a/app/__pycache__/document_generator.cpython-313.pyc b/app/__pycache__/document_generator.cpython-313.pyc index b141b67c..3fd3fcf4 100644 Binary files a/app/__pycache__/document_generator.cpython-313.pyc and b/app/__pycache__/document_generator.cpython-313.pyc differ diff --git a/app/document_generator.py b/app/document_generator.py index d8f80dd0..882d44bc 100644 --- a/app/document_generator.py +++ b/app/document_generator.py @@ -173,6 +173,182 @@ def insert_image_after_keyword(doc, keywords, image_path, full_name, time): # ============================================================================= +import os +import re +import subprocess +from datetime import datetime +from django.db import models +import numpy as np +from docx import Document +from docx.enum.text import WD_ALIGN_PARAGRAPH +from docx.shared import Inches, Pt +from django.apps import apps +from num2words import num2words +from django.conf import settings +from app.models import Document_Configuration + +# ============================================================================= +# Constants +# ============================================================================= +BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +static_folder = os.path.join(settings.BASE_DIR, "static") + + +# ============================================================================= +# Utility Functions +# ============================================================================= + +def replace_text_in_doc(doc, old_text, new_text): + """Thay thế tất cả các lần xuất hiện của old_text bằng new_text trong tài liệu, xử lý split run.""" + new_text = str(new_text) if new_text is not None else "" + + def replace_in_paragraph(para): + runs = list(para.runs) + full_text = ''.join(run.text for run in runs) + if old_text not in full_text: + return + + start_idx = full_text.find(old_text) + if start_idx == -1: + return + + current_pos = 0 + runs_to_modify = [] + for run in runs: + run_len = len(run.text) + run_start = current_pos + run_end = current_pos + run_len + current_pos = run_end + + if run_start < start_idx + len(old_text) and run_end > start_idx: + runs_to_modify.append(run) + + if not runs_to_modify: + return + + first_run = runs_to_modify[0] + first_run_index = next(i for i, r in enumerate(runs) if r is first_run) + + local_start = start_idx - sum(len(runs[i].text) for i in range(first_run_index)) + + remaining_old = old_text + + for i, run in enumerate(runs_to_modify): + run_text = run.text + if i == 0: + prefix = run_text[:local_start] + remove_len = min(len(remaining_old), len(run_text) - local_start) + suffix = run_text[local_start + remove_len:] + run.text = prefix + suffix + remaining_old = remaining_old[remove_len:] + else: + remove_len = min(len(remaining_old), len(run_text)) + suffix = run_text[remove_len:] + run.text = suffix + remaining_old = remaining_old[remove_len:] + + first_run = runs_to_modify[0] + first_run.text = first_run.text[:local_start] + new_text + first_run.text[local_start:] + + replace_in_paragraph(para) + + for para in doc.paragraphs: + replace_in_paragraph(para) + + for table in doc.tables: + for row in table.rows: + for cell in row.cells: + for para in cell.paragraphs: + replace_in_paragraph(para) + + for section in doc.sections: + footer = section.footer + for para in footer.paragraphs: + if any("PAGE" in run._element.xml for run in para.runs): + continue + replace_in_paragraph(para) + + +def docx_to_pdf(input_path, output_dir=None): + """Converts a .docx file to .pdf using LibreOffice, handling non-zero exit codes gracefully.""" + if output_dir is None: + output_dir = os.path.dirname(os.path.abspath(input_path)) + + pdf_path = os.path.join(output_dir, os.path.basename(input_path).replace(".docx", ".pdf")) + + try: + result = subprocess.run( + [ + "libreoffice", + "--headless", + "--convert-to", + "pdf", + "--outdir", + output_dir, + input_path, + ], + timeout=60, + capture_output=True, + text=True, + ) + + if result.returncode != 0: + # Log the warning/error from LibreOffice + print(f"WARNING: libreoffice command returned non-zero exit code ({result.returncode}) for {input_path}.") + print(f" STDOUT: {result.stdout}") + print(f" STDERR: {result.stderr}") + + # Check if the PDF was created anyway + if not os.path.exists(pdf_path) or os.path.getsize(pdf_path) == 0: + # This is a real failure + raise Exception(f"PDF conversion failed and output file was not created. STDERR: {result.stderr}") + else: + print(f"INFO: PDF file was created successfully despite the non-zero exit code.") + + except FileNotFoundError: + print("ERROR: libreoffice command not found. Please ensure it is installed and in your PATH.") + raise + except Exception as e: + # Re-raise other exceptions (like timeout) + print(f"ERROR: An unexpected error occurred during PDF conversion for {input_path}. Error: {e}") + raise + + +def insert_image_after_keyword(doc, keywords, image_path, full_name, time): + """Finds a keyword in a table and inserts an image and text after it.""" + if not os.path.exists(image_path): + print(f"==INSERT IMAGE ERROR== File not found: {image_path}") + return + + try: + for table in doc.tables: + for row in table.rows: + for cell in row.cells: + for para in cell.paragraphs: + for keyword in keywords: + if keyword in para.text: + p_img = cell.add_paragraph() + p_img.alignment = WD_ALIGN_PARAGRAPH.CENTER + p_img.add_run().add_picture(image_path, width=Inches(1.5)) + + p_name = cell.add_paragraph() + p_name.alignment = WD_ALIGN_PARAGRAPH.CENTER + run_name = p_name.add_run(full_name) + run_name.bold = True + + p_time = cell.add_paragraph() + p_time.alignment = WD_ALIGN_PARAGRAPH.CENTER + p_time.add_run(time) + return + except Exception as e: + print(f"==INSERT IMAGE ERROR== {e}") + + +# ============================================================================= +# Document Generator Class +# ============================================================================= + + class DocumentGenerator: def __init__(self, document_code, context_pks: dict): self.document_code = document_code @@ -356,67 +532,153 @@ class DocumentGenerator: val = apply_format(val, cur_fmt, obj) return str(val) - def prepare_replacements(self): - # Set base date replacements + def _scan_placeholders_in_doc(self, doc): + """Scans the entire document and returns a set of unique placeholders.""" + placeholders = set() + pattern = re.compile(r'\[([^\[\]]+)\]') + + def scan_paragraph(para): + full_text = ''.join(run.text for run in para.runs) + for match in pattern.finditer(full_text): + placeholders.add(f"[{match.group(1)}]") + + for para in doc.paragraphs: + scan_paragraph(para) + + for table in doc.tables: + for row in table.rows: + for cell in row.cells: + for para in cell.paragraphs: + scan_paragraph(para) + + for section in doc.sections: + footer = section.footer + for para in footer.paragraphs: + if any("PAGE" in run._element.xml for run in para.runs): + continue + scan_paragraph(para) + + return placeholders + + def _parse_format_args(self, args_string): + """Parses a string like 'lang:vi, type:number_to_words' into a dictionary.""" + if not args_string: + return {} + format_config = {} + args = args_string.split(',') + for arg in args: + if ':' in arg: + key, value = arg.split(':', 1) + format_config[key.strip()] = value.strip() + return format_config + + def prepare_replacements(self, doc): + """ + Prepares all replacements by implementing a hybrid approach: + 1. Prioritizes manual configuration from 'fields'. + 2. Automatically handles any remaining dynamic placeholders. + """ today = datetime.now() self.replacements['[day]'] = str(today.day) self.replacements['[month]'] = str(today.month) self.replacements['[year]'] = str(today.year) self.replacements['[date]'] = today.strftime("%d/%m/%Y") - for mapping in self.config.mappings: - alias = mapping["alias"] - data = self.data_context.get(alias) + placeholders_in_doc = self._scan_placeholders_in_doc(doc) - if mapping["type"] == "object": - if data is None: - for placeholder in mapping["fields"]: - self.replacements[placeholder] = "" + # PASS 1: Handle manual/explicit configuration (backward compatibility) + if isinstance(self.config.mappings, list): + for mapping in self.config.mappings: + if "fields" not in mapping: continue - for placeholder, config in mapping["fields"].items(): - if isinstance(config, dict): - value = self._get_value_from_object(data, config["source"]) - self.replacements[placeholder] = self._format_value(value, config["format"], data) - else: - value = self._get_value_from_object(data, config) - self.replacements[placeholder] = str(value) if value is not None else "" - elif mapping["type"] == "list": - items = data or [] - max_items = mapping.get("max_items", 4) - for i in range(max_items): - item = items[i] if i < len(items) else None - for p_template, config in mapping["fields"].items(): - placeholder = p_template.replace("{index}", str(i + 1)) - if item is None: - self.replacements[placeholder] = "" - continue - if isinstance(config, dict): - value = self._get_value_from_object(item, config["source"]) - self.replacements[placeholder] = self._format_value(value, config["format"], item) - else: - value = self._get_value_from_object(item, config) - self.replacements[placeholder] = str(value) if value is not None else "" + alias = mapping["alias"] + data = self.data_context.get(alias) + + if mapping["type"] == "list": + items = data or [] + max_items = mapping.get("max_items", 4) + for i in range(max_items): + item = items[i] if i < len(items) else None + for p_template, config in mapping["fields"].items(): + placeholder = p_template.replace("{index}", str(i + 1)) + if placeholder in placeholders_in_doc: + if item is None: + self.replacements[placeholder] = "" + else: + if isinstance(config, dict): + value = self._get_value_from_object(item, config["source"]) + self.replacements[placeholder] = self._format_value(value, config["format"], item) + else: + value = self._get_value_from_object(item, config) + self.replacements[placeholder] = str(value) if value is not None else "" + placeholders_in_doc.discard(placeholder) + + elif mapping["type"] == "object": + if data is None: + for placeholder in mapping["fields"]: + if placeholder in placeholders_in_doc: + self.replacements[placeholder] = "" + placeholders_in_doc.discard(placeholder) + continue + + for placeholder, config in mapping["fields"].items(): + if placeholder in placeholders_in_doc: + if isinstance(config, dict): + value = self._get_value_from_object(data, config["source"]) + self.replacements[placeholder] = self._format_value(value, config["format"], data) + else: + value = self._get_value_from_object(data, config) + self.replacements[placeholder] = str(value) if value is not None else "" + placeholders_in_doc.discard(placeholder) + + # PASS 2: Handle remaining dynamic placeholders + dynamic_pattern = re.compile(r'\[([a-zA-Z0-9_]+\.[a-zA-Z0-9_.]*)(?:\((.*?)\))?\]') + + for placeholder in list(placeholders_in_doc): + match = dynamic_pattern.fullmatch(placeholder) + if not match: + continue + + data_path, format_args_str = match.groups() + + try: + alias, field_path = data_path.split('.', 1) + + if alias not in self.data_context: + self.replacements[placeholder] = f"[ALIAS_NOT_FOUND: {alias}]" + continue + + source_object = self.data_context.get(alias) + value = self._get_value_from_object(source_object, field_path) + + if format_args_str: + format_config = self._parse_format_args(format_args_str) + self.replacements[placeholder] = self._format_value(value, format_config, source_object) + else: + self.replacements[placeholder] = str(value) if value is not None else "" + + except Exception as e: + self.replacements[placeholder] = f"[ERROR: {e}]" def generate(self, signature_info=None, output_filename=None): self.fetch_data() - self.prepare_replacements() - # Remove leading slashes from template_path to prevent os.path.join issues clean_template_path = self.config.template_path.lstrip('/') template_full_path = os.path.join(static_folder, clean_template_path) if not os.path.exists(template_full_path): raise FileNotFoundError(f"Template file not found at: {template_full_path}") - # --- FILENAME LOGIC --- + doc = Document(template_full_path) + + self.prepare_replacements(doc) + if output_filename: - # Use user-provided filename, ensure it has the correct extension if not output_filename.endswith(".docx"): base_name = os.path.splitext(output_filename)[0] output_filename = f"{base_name}.docx" else: - # Use a more descriptive output filename (original logic) - pk_values = "_".join(self.context_pks.values()) + pk_values = "_".join(str(v) for v in self.context_pks.values()) output_filename = f"{self.document_code}_{pk_values}_{int(datetime.now().timestamp())}.docx" output_dir = os.path.join(static_folder, "contract") @@ -424,8 +686,6 @@ class DocumentGenerator: output_path = os.path.join(output_dir, output_filename) pdf_filename = output_filename.replace(".docx", ".pdf") - doc = Document(template_full_path) - for old_text, new_text in self.replacements.items(): replace_text_in_doc(doc, old_text, new_text) diff --git a/static/contract/1. Phiếu xác lập thỏa thuận ca nhan.docx b/static/contract/1. Phiếu xác lập thỏa thuận ca nhan.docx new file mode 100644 index 00000000..9c06f3d0 Binary files /dev/null and b/static/contract/1. Phiếu xác lập thỏa thuận ca nhan.docx differ