#!/usr/bin/env python3
"""
folder_watcher.py — Monitors a directory for new job folders and triggers QR merge.

Trigger conditions (all 3 must be present in a new subfolder):
  - A file with "shop" or "Shop" in the name         → production drawing URL (right QR)
  - A file with "submittal" or "Submittal" in the name → assembly/submittal URL (left QR)
  - A file with "carrier", "Carrier", "carrierqr", or "CarrierQR" in the name → blank carrier PDF

When all 3 are found the script:
  1. Reads the carrier file as the blank PDF template.
  2. Constructs public URLs for the shop and submittal files.
  3. Calls build_carrier_pdf() to generate the QR-coded PDF.
  4. Saves the result as "<folder_name> CarrierQR generated.pdf" in the same folder.

Usage:
    python folder_watcher.py [--watch-dir /var/www/html/qr] [--base-url http://fileshare.icastinc.com/qr]

On startup, any existing subfolders that are missing their generated PDF are processed
immediately so no jobs are missed if the watcher was offline.

Install as a systemd service using the provided qr-watcher.service unit file.

Requirements (in addition to qr_merge.py deps):
    pip install watchdog
"""

import argparse
import logging
import re
import sys
import threading
import time
from pathlib import Path
from urllib.parse import quote

try:
    from watchdog.observers import Observer
    from watchdog.events import FileSystemEventHandler
except ImportError:
    sys.exit("Missing dependency: watchdog\nRun: pip install watchdog")

try:
    from qr_merge import build_carrier_pdf
except ImportError as e:
    sys.exit(f"Missing dependency: {e}\nRun: pip install Pillow qrcode pypdf")


# File name patterns — case-insensitive substring matches
RE_CARRIER = re.compile(r"carrier", re.IGNORECASE)   # matches carrier, Carrier, carrierqr, CarrierQR
RE_SUBMITTAL = re.compile(r"submittal", re.IGNORECASE)
RE_SHOP = re.compile(r"shop", re.IGNORECASE)

# Seconds to wait after the last filesystem event before processing a folder.
# Gives time for all files to finish uploading before the merge runs.
DEBOUNCE_SECONDS = 3.0


# ---------------------------------------------------------------------------
# File classification
# ---------------------------------------------------------------------------

def classify_files(folder: Path):
    """
    Scan folder for the 3 required file types.

    Priority: carrier > submittal > shop (avoids misclassifying a file whose
    name contains multiple keywords).

    Files named "*generated*" are skipped — they are prior outputs.

    Returns (carrier_path, submittal_path, shop_path) or None if any is missing.
    """
    carrier = submittal = shop = None

    for f in sorted(folder.iterdir()):
        if not f.is_file() or f.suffix.lower() != ".pdf":
            continue
        name = f.name
        if "generated" in name.lower():
            continue  # skip previously generated output files

        if RE_CARRIER.search(name):
            if carrier is None:
                carrier = f
        elif RE_SUBMITTAL.search(name):
            if submittal is None:
                submittal = f
        elif RE_SHOP.search(name):
            if shop is None:
                shop = f

    if carrier and submittal and shop:
        return carrier, submittal, shop
    return None


def output_exists(folder: Path) -> bool:
    """Return True if a previously generated PDF already exists in this folder."""
    for f in folder.iterdir():
        if f.is_file() and "generated" in f.name.lower() and f.suffix.lower() == ".pdf":
            return True
    return False


# ---------------------------------------------------------------------------
# Core processing
# ---------------------------------------------------------------------------

def process_folder(folder: Path, base_url: str, logger: logging.Logger) -> None:
    """
    Check whether folder is ready to process. If so, run the QR merge and
    write the output PDF.  Idempotent — skips if output already exists.
    """
    folder_name = folder.name

    if not folder.is_dir():
        logger.debug("Folder no longer exists, skipping: %s", folder_name)
        return

    if output_exists(folder):
        logger.debug("Skipping %s — generated output already exists", folder_name)
        return

    result = classify_files(folder)
    if not result:
        logger.debug(
            "Skipping %s — waiting for all 3 file types (carrier, submittal, shop)",
            folder_name,
        )
        return

    carrier_path, submittal_path, shop_path = result

    # Construct public-facing URLs for the submittal and shop PDFs
    folder_url = f"{base_url.rstrip('/')}/{quote(folder_name)}"
    submittal_url = f"{folder_url}/{quote(submittal_path.name)}"
    shop_url = f"{folder_url}/{quote(shop_path.name)}"

    logger.info(
        "Processing folder: %s\n"
        "  carrier (blank):  %s\n"
        "  submittal URL:    %s\n"
        "  shop URL:         %s",
        folder_name,
        carrier_path.name,
        submittal_url,
        shop_url,
    )

    try:
        blank_bytes = carrier_path.read_bytes()
        result_bytes = build_carrier_pdf(blank_bytes, submittal_url, shop_url)
    except Exception as exc:
        logger.error("QR merge failed for %s: %s", folder_name, exc, exc_info=True)
        return

    output_path = folder / f"{folder_name} CarrierQR generated.pdf"
    output_path.write_bytes(result_bytes)
    logger.info("Saved: %s", output_path)


# ---------------------------------------------------------------------------
# Filesystem event handler
# ---------------------------------------------------------------------------

class JobFolderHandler(FileSystemEventHandler):
    """
    Watches the root qr/ directory (recursively).

    - New direct subdirectory → schedule a check for it.
    - New / modified / moved file inside a direct subdirectory → schedule a
      check for that subdirectory.

    Checks are debounced so that a burst of file-creation events (e.g. all 3
    files uploaded quickly in sequence) results in only one processing run.
    """

    def __init__(self, watch_dir: Path, base_url: str, logger: logging.Logger):
        super().__init__()
        self.watch_dir = watch_dir
        self.base_url = base_url
        self.logger = logger
        self._timers: dict[str, threading.Timer] = {}
        self._lock = threading.Lock()

    def _schedule_check(self, folder: Path) -> None:
        """Cancel any pending timer for this folder and start a fresh debounce timer."""
        key = str(folder)
        with self._lock:
            existing = self._timers.get(key)
            if existing is not None:
                existing.cancel()
            t = threading.Timer(
                DEBOUNCE_SECONDS,
                process_folder,
                args=(folder, self.base_url, self.logger),
            )
            self._timers[key] = t
        t.start()

    def _resolve_job_folder(self, path: Path) -> Path | None:
        """
        Given an arbitrary path inside the watch dir, return the direct child
        subdirectory of watch_dir that contains it, or None if the path is the
        watch_dir itself (or not a descendant).
        """
        try:
            rel = path.relative_to(self.watch_dir)
        except ValueError:
            return None
        parts = rel.parts
        if len(parts) == 0:
            return None
        candidate = self.watch_dir / parts[0]
        if candidate == path and path.is_dir():
            return candidate          # it IS the direct subdirectory
        if candidate.is_dir() and len(parts) > 1:
            return candidate          # it is a file/dir inside the subdirectory
        return None

    def on_created(self, event):
        path = Path(event.src_path)
        folder = self._resolve_job_folder(path)
        if folder:
            if event.is_directory and folder == path:
                self.logger.info("New job folder detected: %s", path.name)
            self._schedule_check(folder)

    def on_modified(self, event):
        if not event.is_directory:
            path = Path(event.src_path)
            folder = self._resolve_job_folder(path)
            if folder:
                self._schedule_check(folder)

    def on_moved(self, event):
        dest = Path(event.dest_path)
        folder = self._resolve_job_folder(dest)
        if folder:
            self._schedule_check(folder)


# ---------------------------------------------------------------------------
# Startup scan
# ---------------------------------------------------------------------------

def scan_existing(watch_dir: Path, base_url: str, logger: logging.Logger) -> None:
    """
    Process any existing subfolders that are missing their generated PDF.
    Runs synchronously at startup so jobs are not lost if the watcher was down.
    """
    logger.info("Scanning existing folders in %s …", watch_dir)
    processed = 0
    for entry in sorted(watch_dir.iterdir()):
        if entry.is_dir():
            process_folder(entry, base_url, logger)
            processed += 1
    logger.info("Startup scan complete (%d subfolder(s) checked)", processed)


# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------

def main() -> None:
    parser = argparse.ArgumentParser(
        description="Watch for QR merge job folders and auto-generate carrier PDFs",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "--watch-dir",
        default="/var/www/html/qr",
        help="Root directory to monitor for new job subfolders",
    )
    parser.add_argument(
        "--base-url",
        default="http://fileshare.icastinc.com/qr",
        help="Public base URL corresponding to --watch-dir",
    )
    parser.add_argument(
        "--log-level",
        default="INFO",
        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
        help="Logging verbosity",
    )
    args = parser.parse_args()

    logging.basicConfig(
        level=args.log_level,
        format="%(asctime)s %(levelname)-8s %(message)s",
        datefmt="%Y-%m-%dT%H:%M:%S",
        stream=sys.stdout,
    )
    logger = logging.getLogger("qr_watcher")

    watch_dir = Path(args.watch_dir)
    if not watch_dir.is_dir():
        logger.error("Watch directory does not exist: %s", watch_dir)
        sys.exit(1)

    scan_existing(watch_dir, args.base_url, logger)

    handler = JobFolderHandler(watch_dir, args.base_url, logger)
    observer = Observer()
    observer.schedule(handler, str(watch_dir), recursive=True)
    observer.start()
    logger.info("Watching %s (press Ctrl+C to stop)", watch_dir)

    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        pass
    finally:
        observer.stop()
        observer.join()
        logger.info("Watcher stopped")


if __name__ == "__main__":
    main()
