LPR-OCR/detect_project.py

#!/usr/bin/env python3
"""
License Plate Detection for Projects
Uses project-specific parameters and saves results to project output folder.
"""

import cv2
import numpy as np
import pytesseract
from PIL import Image
import os
import json
import re
import argparse
from pathlib import Path
from collections import defaultdict, Counter

class ProjectDetector:
    def __init__(self, project_dir):
        self.project_dir = Path(project_dir)
        self.raw_dir = self.project_dir / 'raw'
        self.debug_dir = self.project_dir / 'debug'
        self.output_dir = self.project_dir / 'output'

        # Ensure directories exist
        self.debug_dir.mkdir(exist_ok=True)
        self.output_dir.mkdir(exist_ok=True)

        # Load project parameters if available
        self.params = self.load_detection_parameters()

    def load_detection_parameters(self):
        """Load detection parameters from project annotations or use defaults."""
        params_file = self.project_dir / 'debug' / 'detection_parameters.json'

        if params_file.exists():
            with open(params_file, 'r') as f:
                params = json.load(f)
            print(f"✓ Loaded detection parameters from {params_file}")
            return params
        else:
            # Use aggressive default parameters
            params = {
                'min_width': 30,
                'max_width': 1200,
                'min_height': 15,
                'max_height': 600,
                'min_aspect_ratio': 0.8,
                'max_aspect_ratio': 12.0,
                'min_area': 450,
                'max_area': 720000
            }
            print("⚠ Using default aggressive parameters (no annotations found)")
            return params

    def score_license_plate_likelihood(self, bbox, area, solidity, extent):
        """Score how likely this region is to be a license plate."""
        x, y, w, h = bbox
        aspect_ratio = w / float(h)

        score = 0

        # Size scoring - prefer license plate-like sizes
        if 50 <= w <= 600 and 20 <= h <= 200:
            score += 30
        elif 30 <= w <= 800 and 15 <= h <= 300:
            score += 20
        else:
            score += 5

        # Aspect ratio scoring
        if 1.5 <= aspect_ratio <= 6.0:
            score += 40
        elif 1.0 <= aspect_ratio <= 8.0:
            score += 25
        else:
            score += 10

        # Area scoring
        if 1000 <= area <= 120000:
            score += 20
        elif 500 <= area <= 200000:
            score += 15
        else:
            score += 5

        # Geometric quality
        if solidity > 0.3:
            score += 15
        if extent > 0.3:
            score += 15

        # Bonus for rectangular shapes
        if 0.7 <= extent <= 1.0 and solidity > 0.7:
            score += 20

        return score

    def comprehensive_preprocessing(self, image):
        """Apply comprehensive preprocessing to maximize detection."""
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image

        methods = []

        # 1. Original
        methods.append(('original', gray))

        # 2. Multiple contrast enhancements
        clahe1 = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        enhanced1 = clahe1.apply(gray)
        methods.append(('clahe_2', enhanced1))

        clahe2 = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8,8))
        enhanced2 = clahe2.apply(gray)
        methods.append(('clahe_4', enhanced2))

        # 3. Histogram equalization
        hist_eq = cv2.equalizeHist(gray)
        methods.append(('hist_eq', hist_eq))

        # 4. Multiple bilateral filters
        bilateral1 = cv2.bilateralFilter(gray, 9, 75, 75)
        methods.append(('bilateral_9', bilateral1))

        bilateral2 = cv2.bilateralFilter(gray, 15, 80, 80)
        methods.append(('bilateral_15', bilateral2))

        # 5. Gaussian blurs
        gaussian1 = cv2.GaussianBlur(gray, (3, 3), 0)
        methods.append(('gaussian_3', gaussian1))

        gaussian2 = cv2.GaussianBlur(gray, (5, 5), 0)
        methods.append(('gaussian_5', gaussian2))

        # 6. Morphological operations
        kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
        morph1 = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel3)
        methods.append(('morph_close_3', morph1))

        kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
        morph2 = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel5)
        methods.append(('morph_open_5', morph2))

        # 7. Sharpening
        sharpening_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
        sharpened = cv2.filter2D(gray, -1, sharpening_kernel)
        methods.append(('sharpened', sharpened))

        # 8. Unsharp masking
        gaussian_blur = cv2.GaussianBlur(gray, (9, 9), 10.0)
        unsharp = cv2.addWeighted(gray, 1.5, gaussian_blur, -0.5, 0)
        methods.append(('unsharp', unsharp))

        # 9. Top-hat filtering
        tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, kernel5)
        methods.append(('tophat', tophat))

        # 10. Bottom-hat filtering
        blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel5)
        methods.append(('blackhat', blackhat))

        return methods

    def comprehensive_edge_detection(self, image):
        """Apply comprehensive edge detection methods."""
        methods = []

        # Multiple Canny thresholds
        canny_configs = [
            (20, 60), (30, 90), (40, 120), (50, 150),
            (60, 180), (80, 200), (100, 250), (30, 200)
        ]

        for low, high in canny_configs:
            canny = cv2.Canny(image, low, high)
            methods.append((f'canny_{low}_{high}', canny))

        # Sobel edges
        sobelx = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3)
        sobely = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3)
        sobel = np.sqrt(sobelx**2 + sobely**2)
        sobel_norm = np.uint8(sobel * 255 / np.max(sobel))
        methods.append(('sobel_3', sobel_norm))

        # Sobel with different kernel sizes
        sobelx5 = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5)
        sobely5 = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=5)
        sobel5 = np.sqrt(sobelx5**2 + sobely5**2)
        sobel5_norm = np.uint8(sobel5 * 255 / np.max(sobel5))
        methods.append(('sobel_5', sobel5_norm))

        # Laplacian
        laplacian = cv2.Laplacian(image, cv2.CV_64F)
        laplacian_norm = np.uint8(np.absolute(laplacian))
        methods.append(('laplacian', laplacian_norm))

        # Laplacian of Gaussian
        gaussian = cv2.GaussianBlur(image, (3, 3), 0)
        log = cv2.Laplacian(gaussian, cv2.CV_64F)
        log_norm = np.uint8(np.absolute(log))
        methods.append(('log', log_norm))

        # Scharr edges
        scharrx = cv2.Scharr(image, cv2.CV_64F, 1, 0)
        scharry = cv2.Scharr(image, cv2.CV_64F, 0, 1)
        scharr = np.sqrt(scharrx**2 + scharry**2)
        scharr_norm = np.uint8(scharr * 255 / np.max(scharr))
        methods.append(('scharr', scharr_norm))

        return methods

    def find_all_potential_plates(self, edge_image, method_name=""):
        """Find ALL potential license plate regions with very loose filtering."""
        contours, _ = cv2.findContours(edge_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

        candidates = []

        for contour in contours:
            # Basic measurements
            x, y, w, h = cv2.boundingRect(contour)
            aspect_ratio = w / float(h)
            area = cv2.contourArea(contour)

            # Very loose filtering
            if (self.params['min_width'] <= w <= self.params['max_width'] and
                self.params['min_height'] <= h <= self.params['max_height'] and
                self.params['min_aspect_ratio'] <= aspect_ratio <= self.params['max_aspect_ratio'] and
                self.params['min_area'] <= area <= self.params['max_area']):

                # Geometric quality
                hull = cv2.convexHull(contour)
                hull_area = cv2.contourArea(hull)
                solidity = area / hull_area if hull_area > 0 else 0
                extent = area / (w * h)

                # Calculate likelihood score
                likelihood_score = self.score_license_plate_likelihood((x, y, w, h), area, solidity, extent)

                candidates.append({
                    'contour': contour,
                    'bbox': (x, y, w, h),
                    'aspect_ratio': aspect_ratio,
                    'area': area,
                    'solidity': solidity,
                    'extent': extent,
                    'likelihood_score': likelihood_score,
                    'method': method_name
                })

        return candidates

    def save_comprehensive_visualization(self, image, candidates, method_name, top_n=20):
        """Save visualization with many candidates."""
        if len(image.shape) == 2:
            vis_img = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
        else:
            vis_img = image.copy()

        # Sort by likelihood score
        candidates_sorted = sorted(candidates, key=lambda x: x['likelihood_score'], reverse=True)

        colors = [
            (0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255),
            (0, 255, 255), (128, 255, 0), (255, 128, 0), (128, 0, 255), (0, 128, 255),
            (255, 255, 128), (255, 128, 255), (128, 255, 255), (192, 192, 0), (192, 0, 192),
            (0, 192, 192), (64, 255, 64), (255, 64, 64), (64, 64, 255), (128, 128, 128)
        ]

        for i, candidate in enumerate(candidates_sorted[:top_n]):
            x, y, w, h = candidate['bbox']
            color = colors[i % len(colors)]

            # Draw rectangle
            thickness = 3 if i < 5 else 2
            cv2.rectangle(vis_img, (x, y), (x + w, y + h), color, thickness)

            # Add label
            label = f"#{i+1}:{candidate['likelihood_score']:.0f}"
            font_scale = 0.7 if i < 5 else 0.5
            cv2.putText(vis_img, label, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, 2)

            # Add size info for top candidates
            if i < 10:
                size_label = f"{w}x{h}"
                cv2.putText(vis_img, size_label, (x, y + h + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1)

        output_path = self.debug_dir / f"comprehensive_{method_name}.jpg"
        cv2.imwrite(str(output_path), vis_img)

        return output_path

    def extract_and_ocr_region(self, image, candidate, candidate_id):
        """Extract region and try comprehensive OCR."""
        x, y, w, h = candidate['bbox']

        # Add padding
        padding = max(5, min(w, h) // 10)
        x1 = max(0, x - padding)
        y1 = max(0, y - padding)
        x2 = min(image.shape[1], x + w + padding)
        y2 = min(image.shape[0], y + h + padding)

        region = image[y1:y2, x1:x2]

        if region.size == 0:
            return []

        # Save original region
        cv2.imwrite(str(self.debug_dir / f"region_{candidate_id:02d}_original.jpg"), region)

        # Multiple preprocessing approaches
        preprocessed = []

        # 1. Original
        preprocessed.append(('original', region))

        # 2. Multiple thresholding methods
        _, otsu = cv2.threshold(region, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        preprocessed.append(('otsu', otsu))

        _, inv_otsu = cv2.threshold(region, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
        preprocessed.append(('inv_otsu', inv_otsu))

        # 3. Adaptive thresholding with different parameters
        if region.shape[0] > 10 and region.shape[1] > 10:
            adaptive1 = cv2.adaptiveThreshold(region, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
            preprocessed.append(('adaptive_11_2', adaptive1))

            if min(region.shape) > 20:
                adaptive2 = cv2.adaptiveThreshold(region, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, 5)
                preprocessed.append(('adaptive_15_5', adaptive2))

        # 4. Morphological operations
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
        if len(otsu.shape) == 2:
            morph_close = cv2.morphologyEx(otsu, cv2.MORPH_CLOSE, kernel)
            preprocessed.append(('morph_close', morph_close))

            morph_open = cv2.morphologyEx(otsu, cv2.MORPH_OPEN, kernel)
            preprocessed.append(('morph_open', morph_open))

        # 5. Contrast enhancement
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
        enhanced = clahe.apply(region)
        preprocessed.append(('enhanced', enhanced))

        # 6. Resize if too small
        if w < 100 or h < 25:
            scale_factor = max(100/w, 25/h, 2.0)
            new_w, new_h = int(w * scale_factor), int(h * scale_factor)
            resized = cv2.resize(region, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
            preprocessed.append(('resized', resized))

            # Also resize thresholded versions
            resized_otsu = cv2.resize(otsu, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
            preprocessed.append(('resized_otsu', resized_otsu))

        # OCR configurations
        ocr_configs = [
            ('psm6', '--psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
            ('psm7', '--psm 7 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
            ('psm8', '--psm 8 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
            ('psm10', '--psm 10 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
            ('psm13', '--psm 13'),
            ('default', ''),
            ('digits', '-c tessedit_char_whitelist=0123456789'),
            ('letters', '-c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
        ]

        results = []

        for preprocess_name, processed_img in preprocessed:
            # Save preprocessed image
            cv2.imwrite(str(self.debug_dir / f"region_{candidate_id:02d}_{preprocess_name}.jpg"), processed_img)

            for config_name, config in ocr_configs:
                try:
                    pil_img = Image.fromarray(processed_img)
                    text = pytesseract.image_to_string(pil_img, config=config).strip()

                    # Clean text
                    clean_text = re.sub(r'[^A-Z0-9]', '', text.upper())

                    if len(clean_text) >= 2:  # Very loose requirement
                        confidence = self.calculate_comprehensive_confidence(clean_text, candidate)

                        results.append({
                            'text': clean_text,
                            'confidence': confidence,
                            'preprocessing': preprocess_name,
                            'config': config_name,
                            'raw_text': text,
                            'candidate_score': candidate['likelihood_score']
                        })

                except Exception as e:
                    continue

        return results

    def calculate_comprehensive_confidence(self, text, candidate):
        """Calculate confidence for any potential license plate text."""
        if not text or len(text) < 2:
            return 0

        score = 0

        # Length scoring
        if 6 <= len(text) <= 8:
            score += 30
        elif 4 <= len(text) <= 9:
            score += 20
        elif 3 <= len(text) <= 10:
            score += 10
        else:
            score += 5

        # Character composition
        has_letter = any(c.isalpha() for c in text)
        has_number = any(c.isdigit() for c in text)

        if has_letter and has_number:
            score += 25
        elif has_letter or has_number:
            score += 15

        # Maryland-specific patterns
        if len(text) == 7:
            if text[:3].isalpha() and text[3:].isdigit():
                score += 30  # ABC1234
            elif text[0].isdigit() and text[1:4].isalpha() and text[4:].isdigit():
                score += 25  # 1ABC234
        elif len(text) == 6:
            if text[:3].isalpha() and text[3:].isdigit():
                score += 25  # ABC123
            elif text[:2].isalpha() and text[2:].isdigit():
                score += 20  # AB1234

        # Geometric bonus
        score += candidate['likelihood_score'] * 0.3

        # Penalize too many repeated characters
        unique_chars = len(set(text))
        if unique_chars < len(text) * 0.5:
            score -= 15

        # Bonus for reasonable character diversity
        if unique_chars >= 3:
            score += 10

        return max(0, score)

    def remove_overlapping_candidates(self, candidates, overlap_threshold=0.3):
        """Remove overlapping candidates, keeping highest scoring ones."""
        if not candidates:
            return []

        # Sort by likelihood score
        candidates.sort(key=lambda x: x['likelihood_score'], reverse=True)

        unique = []
        for candidate in candidates:
            bbox1 = candidate['bbox']
            is_duplicate = False

            for existing in unique:
                bbox2 = existing['bbox']
                if self.calculate_overlap(bbox1, bbox2) > overlap_threshold:
                    is_duplicate = True
                    break

            if not is_duplicate:
                unique.append(candidate)

        return unique

    def calculate_overlap(self, bbox1, bbox2):
        """Calculate intersection over union."""
        x1, y1, w1, h1 = bbox1
        x2, y2, w2, h2 = bbox2

        x_left = max(x1, x2)
        y_top = max(y1, y2)
        x_right = min(x1 + w1, x2 + w2)
        y_bottom = min(y1 + h1, y2 + h2)

        if x_right < x_left or y_bottom < y_top:
            return 0.0

        intersection = (x_right - x_left) * (y_bottom - y_top)
        area1 = w1 * h1
        area2 = w2 * h2
        union = area1 + area2 - intersection

        return intersection / union if union > 0 else 0.0

    def process_image(self, image_path):
        """Process a single image with comprehensive detection."""
        print(f"\n=== PROCESSING: {Path(image_path).name} ===")

        # Load image
        image = cv2.imread(str(image_path))
        if image is None:
            print(f"Could not load image: {image_path}")
            return []

        # Save original
        cv2.imwrite(str(self.debug_dir / f"00_original_{Path(image_path).stem}.jpg"), image)

        # Comprehensive preprocessing
        preprocessed_images = self.comprehensive_preprocessing(image)

        all_candidates = []
        method_count = 0

        print(f"Testing {len(preprocessed_images)} preprocessing methods...")

        for preprocess_name, preprocessed_img in preprocessed_images:
            print(f"\n  Preprocessing: {preprocess_name}")

            # Save preprocessed image
            cv2.imwrite(str(self.debug_dir / f"01_{preprocess_name}_{Path(image_path).stem}.jpg"), preprocessed_img)

            # Comprehensive edge detection
            edge_methods = self.comprehensive_edge_detection(preprocessed_img)

            for edge_name, edge_img in edge_methods:
                method_name = f"{preprocess_name}_{edge_name}"
                method_count += 1

                # Save edge image
                cv2.imwrite(str(self.debug_dir / f"02_{method_name}_{Path(image_path).stem}.jpg"), edge_img)

                # Find candidates
                candidates = self.find_all_potential_plates(edge_img, method_name)

                if candidates:
                    print(f"    {edge_name}: {len(candidates)} candidates")

                    # Add method info
                    for candidate in candidates:
                        candidate['full_method'] = method_name
                        candidate['preprocessing'] = preprocess_name
                        candidate['edge_detection'] = edge_name

                    all_candidates.extend(candidates)

                    # Save visualization for this method
                    if len(candidates) > 0:
                        self.save_comprehensive_visualization(image, candidates, method_name, top_n=10)

        print(f"\nProcessed {method_count} total method combinations")
        print(f"Found {len(all_candidates)} total candidates")

        if not all_candidates:
            print("No candidates found even with aggressive parameters!")
            return []

        # Remove overlapping candidates
        unique_candidates = self.remove_overlapping_candidates(all_candidates, overlap_threshold=0.2)
        print(f"After removing overlaps: {len(unique_candidates)} unique candidates")

        # Sort by likelihood score
        unique_candidates.sort(key=lambda x: x['likelihood_score'], reverse=True)

        # Show top candidates
        print(f"\nTop 20 candidates by likelihood score:")
        for i, candidate in enumerate(unique_candidates[:20], 1):
            x, y, w, h = candidate['bbox']
            print(f"  {i:2d}. {w:3d}x{h:3d} at ({x:3d},{y:3d}) - Score: {candidate['likelihood_score']:5.1f} - AR: {candidate['aspect_ratio']:.2f} - {candidate['full_method']}")

        # Create overall visualization
        self.save_comprehensive_visualization(image, unique_candidates, f"all_methods_{Path(image_path).stem}", top_n=30)

        # Extract and OCR top candidates
        print(f"\nExtracting and OCR'ing top 15 candidates...")

        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        all_ocr_results = []

        for i, candidate in enumerate(unique_candidates[:15]):
            print(f"\nCandidate {i+1}: {candidate['bbox'][2]}x{candidate['bbox'][3]} (score: {candidate['likelihood_score']:.1f})")

            ocr_results = self.extract_and_ocr_region(gray, candidate, i+1)
            all_ocr_results.extend(ocr_results)

            # Print OCR results for this candidate
            for result in ocr_results:
                if result['confidence'] > 10:
                    print(f"  OCR: '{result['text']}' (conf: {result['confidence']:.1f}, {result['preprocessing']}+{result['config']})")

        # Sort all OCR results by confidence
        if all_ocr_results:
            # Remove duplicates, keeping highest confidence
            unique_ocr = {}
            for result in all_ocr_results:
                text = result['text']
                if text not in unique_ocr or result['confidence'] > unique_ocr[text]['confidence']:
                    unique_ocr[text] = result

            sorted_results = sorted(unique_ocr.values(), key=lambda x: x['confidence'], reverse=True)

            print(f"\n=== ALL OCR RESULTS (Top 20) ===")
            for i, result in enumerate(sorted_results[:20], 1):
                print(f"{i:2d}. '{result['text']}' (confidence: {result['confidence']:.1f}) - {result['preprocessing']}+{result['config']}")

            return [r['text'] for r in sorted_results if r['confidence'] > 5]

        print("No valid OCR results found.")
        return []

    def analyze_project(self):
        """Analyze all images in the project."""
        # Get all images from raw directory
        image_files = list(self.raw_dir.glob('*'))
        image_files = [f for f in image_files if f.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp']]

        if not image_files:
            print(f"No image files found in {self.raw_dir}")
            return

        print(f"=== PROJECT ANALYSIS: {len(image_files)} images ===")

        all_results = {}

        for image_file in image_files:
            results = self.process_image(image_file)
            all_results[image_file.name] = results

        # Generate comprehensive analysis
        self.generate_comprehensive_analysis(all_results)

        return all_results

    def generate_comprehensive_analysis(self, all_results):
        """Generate comprehensive analysis and save to output folder."""
        print(f"\n=== GENERATING COMPREHENSIVE ANALYSIS ===")

        # Collect all unique candidates with their scores
        all_unique_candidates = {}

        for image_file, candidates in all_results.items():
            for candidate in candidates:
                if candidate not in all_unique_candidates:
                    base_score = self.score_maryland_likelihood(candidate)

                    # Add multi-image bonus
                    appearance_count = sum(1 for cands in all_results.values() if candidate in cands)
                    multi_image_bonus = (appearance_count - 1) * 25

                    total_score = base_score + multi_image_bonus

                    all_unique_candidates[candidate] = {
                        'base_score': base_score,
                        'multi_image_bonus': multi_image_bonus,
                        'total_score': total_score,
                        'appearances': appearance_count,
                        'sources': [img for img, cands in all_results.items() if candidate in cands]
                    }

        # Sort by total score
        ranked_candidates = sorted(all_unique_candidates.items(), key=lambda x: x[1]['total_score'], reverse=True)

        # Save detailed results
        output_file = self.output_dir / 'comprehensive_results.json'
        output_data = {
            'project_id': self.project_dir.name,
            'analysis_date': str(Path().cwd()),
            'total_images': len(all_results),
            'total_candidates': len(all_unique_candidates),
            'ranked_candidates': [
                {
                    'text': candidate,
                    'total_score': scores['total_score'],
                    'base_score': scores['base_score'],
                    'multi_image_bonus': scores['multi_image_bonus'],
                    'appearances': scores['appearances'],
                    'sources': scores['sources']
                }
                for candidate, scores in ranked_candidates
            ],
            'individual_results': all_results
        }

        with open(output_file, 'w') as f:
            json.dump(output_data, f, indent=2)

        # Save top candidates for easy access
        top_candidates_file = self.output_dir / 'top_candidates.txt'
        with open(top_candidates_file, 'w') as f:
            f.write("=== TOP LICENSE PLATE CANDIDATES ===\n\n")
            f.write("Rank  Candidate  Total  Base  Multi  Appears  Sources\n")
            f.write("----  ---------  -----  ----  -----  -------  -------\n")

            for i, (candidate, scores) in enumerate(ranked_candidates[:30], 1):
                sources = '+'.join([s.split('.')[0] for s in scores['sources']])
                f.write(f"{i:3d}.  {candidate:9s}  {scores['total_score']:3.0f}   {scores['base_score']:3.0f}   {scores['multi_image_bonus']:3.0f}   {scores['appearances']:7d}   {sources}\n")

            f.write(f"\n=== RECOMMENDATIONS ===\n")
            f.write("Start Maryland DMV search with these high-scoring candidates:\n\n")

            high_score_candidates = [item for item in ranked_candidates if item[1]['total_score'] >= 50]

            for i, (candidate, scores) in enumerate(high_score_candidates[:15], 1):
                confidence_level = "HIGH" if scores['total_score'] >= 80 else "MEDIUM" if scores['total_score'] >= 60 else "GOOD"
                multi_img = " (BOTH IMAGES)" if scores['appearances'] > 1 else ""
                f.write(f"  {i:2d}. {candidate:8s} (Score: {scores['total_score']:3.0f}, {confidence_level}){multi_img}\n")

        print(f"✓ Analysis complete!")
        print(f"✓ Detailed results saved to: {output_file}")
        print(f"✓ Top candidates saved to: {top_candidates_file}")
        print(f"✓ Found {len(all_unique_candidates)} unique candidates")
        print(f"✓ {len([c for c in all_unique_candidates.values() if c['total_score'] >= 50])} high-confidence candidates")

    def score_maryland_likelihood(self, text):
        """Score how likely a candidate is to be a Maryland license plate."""
        if not text or len(text) < 2:
            return 0

        score = 0

        # Length scoring - Maryland plates are typically 6-7 characters
        if len(text) == 7:
            score += 40
        elif len(text) == 6:
            score += 35
        elif len(text) == 5:
            score += 20
        elif len(text) == 4:
            score += 15
        elif len(text) == 8:
            score += 10
        else:
            score += 5

        # Character composition
        has_letter = any(c.isalpha() for c in text)
        has_number = any(c.isdigit() for c in text)

        if has_letter and has_number:
            score += 30
        elif has_letter or has_number:
            score += 15

        # Maryland-specific patterns
        if len(text) == 7:
            if text[:3].isalpha() and text[3:].isdigit():
                score += 50  # ABC1234 - most common Maryland format
            elif text[0].isdigit() and text[1:4].isalpha() and text[4:].isdigit():
                score += 40  # 1ABC234 - also common
        elif len(text) == 6:
            if text[:3].isalpha() and text[3:].isdigit():
                score += 40  # ABC123
            elif text[:2].isalpha() and text[2:].isdigit():
                score += 30  # AB1234

        # Penalize very short results or all same character
        if len(text) <= 2:
            score -= 20

        if len(set(text)) == 1:  # All same character
            score -= 30

        # Bonus for realistic character diversity
        unique_chars = len(set(text))
        if unique_chars >= 4:
            score += 15
        elif unique_chars >= 3:
            score += 10

        # Penalize common OCR errors/noise
        noise_patterns = ['SSS', 'EEE', 'AAA', 'OOO', '111', '000']
        if text in noise_patterns:
            score -= 40

        # Penalize obviously wrong patterns
        if re.match(r'^[A-Z]{1,2}$', text) or re.match(r'^[0-9]{1,2}$', text):
            score -= 20

        return max(0, score)

def main():
    parser = argparse.ArgumentParser(description='License Plate Detection for Projects')
    parser.add_argument('--project-id', type=int, required=True, help='Project ID')
    parser.add_argument('--image', help='Specific image to process')

    args = parser.parse_args()

    project_dir = Path(f"projects/{args.project_id:03d}")

    if not project_dir.exists():
        print(f"Project {args.project_id:03d} does not exist. Create it first.")
        return

    detector = ProjectDetector(project_dir)

    if args.image:
        # Process specific image
        image_path = project_dir / 'raw' / args.image
        if not image_path.exists():
            print(f"Image {args.image} not found in project {args.project_id:03d}")
            return

        results = detector.process_image(image_path)
        print(f"\nResults for {args.image}: {len(results)} candidates")
        for i, result in enumerate(results[:10], 1):
            print(f"  {i}. {result}")
    else:
        # Analyze entire project
        detector.analyze_project()

if __name__ == '__main__':
    main()