#!/usr/bin/env python3 """ License Plate Detection for Projects Uses project-specific parameters and saves results to project output folder. """ import cv2 import numpy as np import pytesseract from PIL import Image import os import json import re import argparse from pathlib import Path from collections import defaultdict, Counter class ProjectDetector: def __init__(self, project_dir): self.project_dir = Path(project_dir) self.raw_dir = self.project_dir / 'raw' self.debug_dir = self.project_dir / 'debug' self.output_dir = self.project_dir / 'output' # Ensure directories exist self.debug_dir.mkdir(exist_ok=True) self.output_dir.mkdir(exist_ok=True) # Load project parameters if available self.params = self.load_detection_parameters() def load_detection_parameters(self): """Load detection parameters from project annotations or use defaults.""" params_file = self.project_dir / 'debug' / 'detection_parameters.json' if params_file.exists(): with open(params_file, 'r') as f: params = json.load(f) print(f"✓ Loaded detection parameters from {params_file}") return params else: # Use aggressive default parameters params = { 'min_width': 30, 'max_width': 1200, 'min_height': 15, 'max_height': 600, 'min_aspect_ratio': 0.8, 'max_aspect_ratio': 12.0, 'min_area': 450, 'max_area': 720000 } print("⚠ Using default aggressive parameters (no annotations found)") return params def score_license_plate_likelihood(self, bbox, area, solidity, extent): """Score how likely this region is to be a license plate.""" x, y, w, h = bbox aspect_ratio = w / float(h) score = 0 # Size scoring - prefer license plate-like sizes if 50 <= w <= 600 and 20 <= h <= 200: score += 30 elif 30 <= w <= 800 and 15 <= h <= 300: score += 20 else: score += 5 # Aspect ratio scoring if 1.5 <= aspect_ratio <= 6.0: score += 40 elif 1.0 <= aspect_ratio <= 8.0: score += 25 else: score += 10 # Area scoring if 1000 <= area <= 120000: score += 20 elif 500 <= area <= 200000: score += 15 else: score += 5 # Geometric quality if solidity > 0.3: score += 15 if extent > 0.3: score += 15 # Bonus for rectangular shapes if 0.7 <= extent <= 1.0 and solidity > 0.7: score += 20 return score def comprehensive_preprocessing(self, image): """Apply comprehensive preprocessing to maximize detection.""" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image methods = [] # 1. Original methods.append(('original', gray)) # 2. Multiple contrast enhancements clahe1 = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) enhanced1 = clahe1.apply(gray) methods.append(('clahe_2', enhanced1)) clahe2 = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8,8)) enhanced2 = clahe2.apply(gray) methods.append(('clahe_4', enhanced2)) # 3. Histogram equalization hist_eq = cv2.equalizeHist(gray) methods.append(('hist_eq', hist_eq)) # 4. Multiple bilateral filters bilateral1 = cv2.bilateralFilter(gray, 9, 75, 75) methods.append(('bilateral_9', bilateral1)) bilateral2 = cv2.bilateralFilter(gray, 15, 80, 80) methods.append(('bilateral_15', bilateral2)) # 5. Gaussian blurs gaussian1 = cv2.GaussianBlur(gray, (3, 3), 0) methods.append(('gaussian_3', gaussian1)) gaussian2 = cv2.GaussianBlur(gray, (5, 5), 0) methods.append(('gaussian_5', gaussian2)) # 6. Morphological operations kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) morph1 = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel3) methods.append(('morph_close_3', morph1)) kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) morph2 = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel5) methods.append(('morph_open_5', morph2)) # 7. Sharpening sharpening_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]]) sharpened = cv2.filter2D(gray, -1, sharpening_kernel) methods.append(('sharpened', sharpened)) # 8. Unsharp masking gaussian_blur = cv2.GaussianBlur(gray, (9, 9), 10.0) unsharp = cv2.addWeighted(gray, 1.5, gaussian_blur, -0.5, 0) methods.append(('unsharp', unsharp)) # 9. Top-hat filtering tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, kernel5) methods.append(('tophat', tophat)) # 10. Bottom-hat filtering blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel5) methods.append(('blackhat', blackhat)) return methods def comprehensive_edge_detection(self, image): """Apply comprehensive edge detection methods.""" methods = [] # Multiple Canny thresholds canny_configs = [ (20, 60), (30, 90), (40, 120), (50, 150), (60, 180), (80, 200), (100, 250), (30, 200) ] for low, high in canny_configs: canny = cv2.Canny(image, low, high) methods.append((f'canny_{low}_{high}', canny)) # Sobel edges sobelx = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3) sobely = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3) sobel = np.sqrt(sobelx**2 + sobely**2) sobel_norm = np.uint8(sobel * 255 / np.max(sobel)) methods.append(('sobel_3', sobel_norm)) # Sobel with different kernel sizes sobelx5 = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5) sobely5 = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=5) sobel5 = np.sqrt(sobelx5**2 + sobely5**2) sobel5_norm = np.uint8(sobel5 * 255 / np.max(sobel5)) methods.append(('sobel_5', sobel5_norm)) # Laplacian laplacian = cv2.Laplacian(image, cv2.CV_64F) laplacian_norm = np.uint8(np.absolute(laplacian)) methods.append(('laplacian', laplacian_norm)) # Laplacian of Gaussian gaussian = cv2.GaussianBlur(image, (3, 3), 0) log = cv2.Laplacian(gaussian, cv2.CV_64F) log_norm = np.uint8(np.absolute(log)) methods.append(('log', log_norm)) # Scharr edges scharrx = cv2.Scharr(image, cv2.CV_64F, 1, 0) scharry = cv2.Scharr(image, cv2.CV_64F, 0, 1) scharr = np.sqrt(scharrx**2 + scharry**2) scharr_norm = np.uint8(scharr * 255 / np.max(scharr)) methods.append(('scharr', scharr_norm)) return methods def find_all_potential_plates(self, edge_image, method_name=""): """Find ALL potential license plate regions with very loose filtering.""" contours, _ = cv2.findContours(edge_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) candidates = [] for contour in contours: # Basic measurements x, y, w, h = cv2.boundingRect(contour) aspect_ratio = w / float(h) area = cv2.contourArea(contour) # Very loose filtering if (self.params['min_width'] <= w <= self.params['max_width'] and self.params['min_height'] <= h <= self.params['max_height'] and self.params['min_aspect_ratio'] <= aspect_ratio <= self.params['max_aspect_ratio'] and self.params['min_area'] <= area <= self.params['max_area']): # Geometric quality hull = cv2.convexHull(contour) hull_area = cv2.contourArea(hull) solidity = area / hull_area if hull_area > 0 else 0 extent = area / (w * h) # Calculate likelihood score likelihood_score = self.score_license_plate_likelihood((x, y, w, h), area, solidity, extent) candidates.append({ 'contour': contour, 'bbox': (x, y, w, h), 'aspect_ratio': aspect_ratio, 'area': area, 'solidity': solidity, 'extent': extent, 'likelihood_score': likelihood_score, 'method': method_name }) return candidates def save_comprehensive_visualization(self, image, candidates, method_name, top_n=20): """Save visualization with many candidates.""" if len(image.shape) == 2: vis_img = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) else: vis_img = image.copy() # Sort by likelihood score candidates_sorted = sorted(candidates, key=lambda x: x['likelihood_score'], reverse=True) colors = [ (0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255), (128, 255, 0), (255, 128, 0), (128, 0, 255), (0, 128, 255), (255, 255, 128), (255, 128, 255), (128, 255, 255), (192, 192, 0), (192, 0, 192), (0, 192, 192), (64, 255, 64), (255, 64, 64), (64, 64, 255), (128, 128, 128) ] for i, candidate in enumerate(candidates_sorted[:top_n]): x, y, w, h = candidate['bbox'] color = colors[i % len(colors)] # Draw rectangle thickness = 3 if i < 5 else 2 cv2.rectangle(vis_img, (x, y), (x + w, y + h), color, thickness) # Add label label = f"#{i+1}:{candidate['likelihood_score']:.0f}" font_scale = 0.7 if i < 5 else 0.5 cv2.putText(vis_img, label, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, 2) # Add size info for top candidates if i < 10: size_label = f"{w}x{h}" cv2.putText(vis_img, size_label, (x, y + h + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1) output_path = self.debug_dir / f"comprehensive_{method_name}.jpg" cv2.imwrite(str(output_path), vis_img) return output_path def extract_and_ocr_region(self, image, candidate, candidate_id): """Extract region and try comprehensive OCR.""" x, y, w, h = candidate['bbox'] # Add padding padding = max(5, min(w, h) // 10) x1 = max(0, x - padding) y1 = max(0, y - padding) x2 = min(image.shape[1], x + w + padding) y2 = min(image.shape[0], y + h + padding) region = image[y1:y2, x1:x2] if region.size == 0: return [] # Save original region cv2.imwrite(str(self.debug_dir / f"region_{candidate_id:02d}_original.jpg"), region) # Multiple preprocessing approaches preprocessed = [] # 1. Original preprocessed.append(('original', region)) # 2. Multiple thresholding methods _, otsu = cv2.threshold(region, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) preprocessed.append(('otsu', otsu)) _, inv_otsu = cv2.threshold(region, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU) preprocessed.append(('inv_otsu', inv_otsu)) # 3. Adaptive thresholding with different parameters if region.shape[0] > 10 and region.shape[1] > 10: adaptive1 = cv2.adaptiveThreshold(region, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) preprocessed.append(('adaptive_11_2', adaptive1)) if min(region.shape) > 20: adaptive2 = cv2.adaptiveThreshold(region, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, 5) preprocessed.append(('adaptive_15_5', adaptive2)) # 4. Morphological operations kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) if len(otsu.shape) == 2: morph_close = cv2.morphologyEx(otsu, cv2.MORPH_CLOSE, kernel) preprocessed.append(('morph_close', morph_close)) morph_open = cv2.morphologyEx(otsu, cv2.MORPH_OPEN, kernel) preprocessed.append(('morph_open', morph_open)) # 5. Contrast enhancement clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8)) enhanced = clahe.apply(region) preprocessed.append(('enhanced', enhanced)) # 6. Resize if too small if w < 100 or h < 25: scale_factor = max(100/w, 25/h, 2.0) new_w, new_h = int(w * scale_factor), int(h * scale_factor) resized = cv2.resize(region, (new_w, new_h), interpolation=cv2.INTER_CUBIC) preprocessed.append(('resized', resized)) # Also resize thresholded versions resized_otsu = cv2.resize(otsu, (new_w, new_h), interpolation=cv2.INTER_CUBIC) preprocessed.append(('resized_otsu', resized_otsu)) # OCR configurations ocr_configs = [ ('psm6', '--psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), ('psm7', '--psm 7 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), ('psm8', '--psm 8 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), ('psm10', '--psm 10 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), ('psm13', '--psm 13'), ('default', ''), ('digits', '-c tessedit_char_whitelist=0123456789'), ('letters', '-c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ'), ] results = [] for preprocess_name, processed_img in preprocessed: # Save preprocessed image cv2.imwrite(str(self.debug_dir / f"region_{candidate_id:02d}_{preprocess_name}.jpg"), processed_img) for config_name, config in ocr_configs: try: pil_img = Image.fromarray(processed_img) text = pytesseract.image_to_string(pil_img, config=config).strip() # Clean text clean_text = re.sub(r'[^A-Z0-9]', '', text.upper()) if len(clean_text) >= 2: # Very loose requirement confidence = self.calculate_comprehensive_confidence(clean_text, candidate) results.append({ 'text': clean_text, 'confidence': confidence, 'preprocessing': preprocess_name, 'config': config_name, 'raw_text': text, 'candidate_score': candidate['likelihood_score'] }) except Exception as e: continue return results def calculate_comprehensive_confidence(self, text, candidate): """Calculate confidence for any potential license plate text.""" if not text or len(text) < 2: return 0 score = 0 # Length scoring if 6 <= len(text) <= 8: score += 30 elif 4 <= len(text) <= 9: score += 20 elif 3 <= len(text) <= 10: score += 10 else: score += 5 # Character composition has_letter = any(c.isalpha() for c in text) has_number = any(c.isdigit() for c in text) if has_letter and has_number: score += 25 elif has_letter or has_number: score += 15 # Maryland-specific patterns if len(text) == 7: if text[:3].isalpha() and text[3:].isdigit(): score += 30 # ABC1234 elif text[0].isdigit() and text[1:4].isalpha() and text[4:].isdigit(): score += 25 # 1ABC234 elif len(text) == 6: if text[:3].isalpha() and text[3:].isdigit(): score += 25 # ABC123 elif text[:2].isalpha() and text[2:].isdigit(): score += 20 # AB1234 # Geometric bonus score += candidate['likelihood_score'] * 0.3 # Penalize too many repeated characters unique_chars = len(set(text)) if unique_chars < len(text) * 0.5: score -= 15 # Bonus for reasonable character diversity if unique_chars >= 3: score += 10 return max(0, score) def remove_overlapping_candidates(self, candidates, overlap_threshold=0.3): """Remove overlapping candidates, keeping highest scoring ones.""" if not candidates: return [] # Sort by likelihood score candidates.sort(key=lambda x: x['likelihood_score'], reverse=True) unique = [] for candidate in candidates: bbox1 = candidate['bbox'] is_duplicate = False for existing in unique: bbox2 = existing['bbox'] if self.calculate_overlap(bbox1, bbox2) > overlap_threshold: is_duplicate = True break if not is_duplicate: unique.append(candidate) return unique def calculate_overlap(self, bbox1, bbox2): """Calculate intersection over union.""" x1, y1, w1, h1 = bbox1 x2, y2, w2, h2 = bbox2 x_left = max(x1, x2) y_top = max(y1, y2) x_right = min(x1 + w1, x2 + w2) y_bottom = min(y1 + h1, y2 + h2) if x_right < x_left or y_bottom < y_top: return 0.0 intersection = (x_right - x_left) * (y_bottom - y_top) area1 = w1 * h1 area2 = w2 * h2 union = area1 + area2 - intersection return intersection / union if union > 0 else 0.0 def process_image(self, image_path): """Process a single image with comprehensive detection.""" print(f"\n=== PROCESSING: {Path(image_path).name} ===") # Load image image = cv2.imread(str(image_path)) if image is None: print(f"Could not load image: {image_path}") return [] # Save original cv2.imwrite(str(self.debug_dir / f"00_original_{Path(image_path).stem}.jpg"), image) # Comprehensive preprocessing preprocessed_images = self.comprehensive_preprocessing(image) all_candidates = [] method_count = 0 print(f"Testing {len(preprocessed_images)} preprocessing methods...") for preprocess_name, preprocessed_img in preprocessed_images: print(f"\n Preprocessing: {preprocess_name}") # Save preprocessed image cv2.imwrite(str(self.debug_dir / f"01_{preprocess_name}_{Path(image_path).stem}.jpg"), preprocessed_img) # Comprehensive edge detection edge_methods = self.comprehensive_edge_detection(preprocessed_img) for edge_name, edge_img in edge_methods: method_name = f"{preprocess_name}_{edge_name}" method_count += 1 # Save edge image cv2.imwrite(str(self.debug_dir / f"02_{method_name}_{Path(image_path).stem}.jpg"), edge_img) # Find candidates candidates = self.find_all_potential_plates(edge_img, method_name) if candidates: print(f" {edge_name}: {len(candidates)} candidates") # Add method info for candidate in candidates: candidate['full_method'] = method_name candidate['preprocessing'] = preprocess_name candidate['edge_detection'] = edge_name all_candidates.extend(candidates) # Save visualization for this method if len(candidates) > 0: self.save_comprehensive_visualization(image, candidates, method_name, top_n=10) print(f"\nProcessed {method_count} total method combinations") print(f"Found {len(all_candidates)} total candidates") if not all_candidates: print("No candidates found even with aggressive parameters!") return [] # Remove overlapping candidates unique_candidates = self.remove_overlapping_candidates(all_candidates, overlap_threshold=0.2) print(f"After removing overlaps: {len(unique_candidates)} unique candidates") # Sort by likelihood score unique_candidates.sort(key=lambda x: x['likelihood_score'], reverse=True) # Show top candidates print(f"\nTop 20 candidates by likelihood score:") for i, candidate in enumerate(unique_candidates[:20], 1): x, y, w, h = candidate['bbox'] print(f" {i:2d}. {w:3d}x{h:3d} at ({x:3d},{y:3d}) - Score: {candidate['likelihood_score']:5.1f} - AR: {candidate['aspect_ratio']:.2f} - {candidate['full_method']}") # Create overall visualization self.save_comprehensive_visualization(image, unique_candidates, f"all_methods_{Path(image_path).stem}", top_n=30) # Extract and OCR top candidates print(f"\nExtracting and OCR'ing top 15 candidates...") gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) all_ocr_results = [] for i, candidate in enumerate(unique_candidates[:15]): print(f"\nCandidate {i+1}: {candidate['bbox'][2]}x{candidate['bbox'][3]} (score: {candidate['likelihood_score']:.1f})") ocr_results = self.extract_and_ocr_region(gray, candidate, i+1) all_ocr_results.extend(ocr_results) # Print OCR results for this candidate for result in ocr_results: if result['confidence'] > 10: print(f" OCR: '{result['text']}' (conf: {result['confidence']:.1f}, {result['preprocessing']}+{result['config']})") # Sort all OCR results by confidence if all_ocr_results: # Remove duplicates, keeping highest confidence unique_ocr = {} for result in all_ocr_results: text = result['text'] if text not in unique_ocr or result['confidence'] > unique_ocr[text]['confidence']: unique_ocr[text] = result sorted_results = sorted(unique_ocr.values(), key=lambda x: x['confidence'], reverse=True) print(f"\n=== ALL OCR RESULTS (Top 20) ===") for i, result in enumerate(sorted_results[:20], 1): print(f"{i:2d}. '{result['text']}' (confidence: {result['confidence']:.1f}) - {result['preprocessing']}+{result['config']}") return [r['text'] for r in sorted_results if r['confidence'] > 5] print("No valid OCR results found.") return [] def analyze_project(self): """Analyze all images in the project.""" # Get all images from raw directory image_files = list(self.raw_dir.glob('*')) image_files = [f for f in image_files if f.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp']] if not image_files: print(f"No image files found in {self.raw_dir}") return print(f"=== PROJECT ANALYSIS: {len(image_files)} images ===") all_results = {} for image_file in image_files: results = self.process_image(image_file) all_results[image_file.name] = results # Generate comprehensive analysis self.generate_comprehensive_analysis(all_results) return all_results def generate_comprehensive_analysis(self, all_results): """Generate comprehensive analysis and save to output folder.""" print(f"\n=== GENERATING COMPREHENSIVE ANALYSIS ===") # Collect all unique candidates with their scores all_unique_candidates = {} for image_file, candidates in all_results.items(): for candidate in candidates: if candidate not in all_unique_candidates: base_score = self.score_maryland_likelihood(candidate) # Add multi-image bonus appearance_count = sum(1 for cands in all_results.values() if candidate in cands) multi_image_bonus = (appearance_count - 1) * 25 total_score = base_score + multi_image_bonus all_unique_candidates[candidate] = { 'base_score': base_score, 'multi_image_bonus': multi_image_bonus, 'total_score': total_score, 'appearances': appearance_count, 'sources': [img for img, cands in all_results.items() if candidate in cands] } # Sort by total score ranked_candidates = sorted(all_unique_candidates.items(), key=lambda x: x[1]['total_score'], reverse=True) # Save detailed results output_file = self.output_dir / 'comprehensive_results.json' output_data = { 'project_id': self.project_dir.name, 'analysis_date': str(Path().cwd()), 'total_images': len(all_results), 'total_candidates': len(all_unique_candidates), 'ranked_candidates': [ { 'text': candidate, 'total_score': scores['total_score'], 'base_score': scores['base_score'], 'multi_image_bonus': scores['multi_image_bonus'], 'appearances': scores['appearances'], 'sources': scores['sources'] } for candidate, scores in ranked_candidates ], 'individual_results': all_results } with open(output_file, 'w') as f: json.dump(output_data, f, indent=2) # Save top candidates for easy access top_candidates_file = self.output_dir / 'top_candidates.txt' with open(top_candidates_file, 'w') as f: f.write("=== TOP LICENSE PLATE CANDIDATES ===\n\n") f.write("Rank Candidate Total Base Multi Appears Sources\n") f.write("---- --------- ----- ---- ----- ------- -------\n") for i, (candidate, scores) in enumerate(ranked_candidates[:30], 1): sources = '+'.join([s.split('.')[0] for s in scores['sources']]) f.write(f"{i:3d}. {candidate:9s} {scores['total_score']:3.0f} {scores['base_score']:3.0f} {scores['multi_image_bonus']:3.0f} {scores['appearances']:7d} {sources}\n") f.write(f"\n=== RECOMMENDATIONS ===\n") f.write("Start Maryland DMV search with these high-scoring candidates:\n\n") high_score_candidates = [item for item in ranked_candidates if item[1]['total_score'] >= 50] for i, (candidate, scores) in enumerate(high_score_candidates[:15], 1): confidence_level = "HIGH" if scores['total_score'] >= 80 else "MEDIUM" if scores['total_score'] >= 60 else "GOOD" multi_img = " (BOTH IMAGES)" if scores['appearances'] > 1 else "" f.write(f" {i:2d}. {candidate:8s} (Score: {scores['total_score']:3.0f}, {confidence_level}){multi_img}\n") print(f"✓ Analysis complete!") print(f"✓ Detailed results saved to: {output_file}") print(f"✓ Top candidates saved to: {top_candidates_file}") print(f"✓ Found {len(all_unique_candidates)} unique candidates") print(f"✓ {len([c for c in all_unique_candidates.values() if c['total_score'] >= 50])} high-confidence candidates") def score_maryland_likelihood(self, text): """Score how likely a candidate is to be a Maryland license plate.""" if not text or len(text) < 2: return 0 score = 0 # Length scoring - Maryland plates are typically 6-7 characters if len(text) == 7: score += 40 elif len(text) == 6: score += 35 elif len(text) == 5: score += 20 elif len(text) == 4: score += 15 elif len(text) == 8: score += 10 else: score += 5 # Character composition has_letter = any(c.isalpha() for c in text) has_number = any(c.isdigit() for c in text) if has_letter and has_number: score += 30 elif has_letter or has_number: score += 15 # Maryland-specific patterns if len(text) == 7: if text[:3].isalpha() and text[3:].isdigit(): score += 50 # ABC1234 - most common Maryland format elif text[0].isdigit() and text[1:4].isalpha() and text[4:].isdigit(): score += 40 # 1ABC234 - also common elif len(text) == 6: if text[:3].isalpha() and text[3:].isdigit(): score += 40 # ABC123 elif text[:2].isalpha() and text[2:].isdigit(): score += 30 # AB1234 # Penalize very short results or all same character if len(text) <= 2: score -= 20 if len(set(text)) == 1: # All same character score -= 30 # Bonus for realistic character diversity unique_chars = len(set(text)) if unique_chars >= 4: score += 15 elif unique_chars >= 3: score += 10 # Penalize common OCR errors/noise noise_patterns = ['SSS', 'EEE', 'AAA', 'OOO', '111', '000'] if text in noise_patterns: score -= 40 # Penalize obviously wrong patterns if re.match(r'^[A-Z]{1,2}$', text) or re.match(r'^[0-9]{1,2}$', text): score -= 20 return max(0, score) def main(): parser = argparse.ArgumentParser(description='License Plate Detection for Projects') parser.add_argument('--project-id', type=int, required=True, help='Project ID') parser.add_argument('--image', help='Specific image to process') args = parser.parse_args() project_dir = Path(f"projects/{args.project_id:03d}") if not project_dir.exists(): print(f"Project {args.project_id:03d} does not exist. Create it first.") return detector = ProjectDetector(project_dir) if args.image: # Process specific image image_path = project_dir / 'raw' / args.image if not image_path.exists(): print(f"Image {args.image} not found in project {args.project_id:03d}") return results = detector.process_image(image_path) print(f"\nResults for {args.image}: {len(results)} candidates") for i, result in enumerate(results[:10], 1): print(f" {i}. {result}") else: # Analyze entire project detector.analyze_project() if __name__ == '__main__': main()