Files
LPR-OCR/detect_project.py
AtHeartEngineer 85ac8e08d2 init for sharing
2025-07-30 13:44:13 -04:00

813 lines
30 KiB
Python

#!/usr/bin/env python3
"""
License Plate Detection for Projects
Uses project-specific parameters and saves results to project output folder.
"""
import cv2
import numpy as np
import pytesseract
from PIL import Image
import os
import json
import re
import argparse
from pathlib import Path
from collections import defaultdict, Counter
class ProjectDetector:
def __init__(self, project_dir):
self.project_dir = Path(project_dir)
self.raw_dir = self.project_dir / 'raw'
self.debug_dir = self.project_dir / 'debug'
self.output_dir = self.project_dir / 'output'
# Ensure directories exist
self.debug_dir.mkdir(exist_ok=True)
self.output_dir.mkdir(exist_ok=True)
# Load project parameters if available
self.params = self.load_detection_parameters()
def load_detection_parameters(self):
"""Load detection parameters from project annotations or use defaults."""
params_file = self.project_dir / 'debug' / 'detection_parameters.json'
if params_file.exists():
with open(params_file, 'r') as f:
params = json.load(f)
print(f"✓ Loaded detection parameters from {params_file}")
return params
else:
# Use aggressive default parameters
params = {
'min_width': 30,
'max_width': 1200,
'min_height': 15,
'max_height': 600,
'min_aspect_ratio': 0.8,
'max_aspect_ratio': 12.0,
'min_area': 450,
'max_area': 720000
}
print("⚠ Using default aggressive parameters (no annotations found)")
return params
def score_license_plate_likelihood(self, bbox, area, solidity, extent):
"""Score how likely this region is to be a license plate."""
x, y, w, h = bbox
aspect_ratio = w / float(h)
score = 0
# Size scoring - prefer license plate-like sizes
if 50 <= w <= 600 and 20 <= h <= 200:
score += 30
elif 30 <= w <= 800 and 15 <= h <= 300:
score += 20
else:
score += 5
# Aspect ratio scoring
if 1.5 <= aspect_ratio <= 6.0:
score += 40
elif 1.0 <= aspect_ratio <= 8.0:
score += 25
else:
score += 10
# Area scoring
if 1000 <= area <= 120000:
score += 20
elif 500 <= area <= 200000:
score += 15
else:
score += 5
# Geometric quality
if solidity > 0.3:
score += 15
if extent > 0.3:
score += 15
# Bonus for rectangular shapes
if 0.7 <= extent <= 1.0 and solidity > 0.7:
score += 20
return score
def comprehensive_preprocessing(self, image):
"""Apply comprehensive preprocessing to maximize detection."""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
methods = []
# 1. Original
methods.append(('original', gray))
# 2. Multiple contrast enhancements
clahe1 = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced1 = clahe1.apply(gray)
methods.append(('clahe_2', enhanced1))
clahe2 = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8,8))
enhanced2 = clahe2.apply(gray)
methods.append(('clahe_4', enhanced2))
# 3. Histogram equalization
hist_eq = cv2.equalizeHist(gray)
methods.append(('hist_eq', hist_eq))
# 4. Multiple bilateral filters
bilateral1 = cv2.bilateralFilter(gray, 9, 75, 75)
methods.append(('bilateral_9', bilateral1))
bilateral2 = cv2.bilateralFilter(gray, 15, 80, 80)
methods.append(('bilateral_15', bilateral2))
# 5. Gaussian blurs
gaussian1 = cv2.GaussianBlur(gray, (3, 3), 0)
methods.append(('gaussian_3', gaussian1))
gaussian2 = cv2.GaussianBlur(gray, (5, 5), 0)
methods.append(('gaussian_5', gaussian2))
# 6. Morphological operations
kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
morph1 = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel3)
methods.append(('morph_close_3', morph1))
kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
morph2 = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel5)
methods.append(('morph_open_5', morph2))
# 7. Sharpening
sharpening_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
sharpened = cv2.filter2D(gray, -1, sharpening_kernel)
methods.append(('sharpened', sharpened))
# 8. Unsharp masking
gaussian_blur = cv2.GaussianBlur(gray, (9, 9), 10.0)
unsharp = cv2.addWeighted(gray, 1.5, gaussian_blur, -0.5, 0)
methods.append(('unsharp', unsharp))
# 9. Top-hat filtering
tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, kernel5)
methods.append(('tophat', tophat))
# 10. Bottom-hat filtering
blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel5)
methods.append(('blackhat', blackhat))
return methods
def comprehensive_edge_detection(self, image):
"""Apply comprehensive edge detection methods."""
methods = []
# Multiple Canny thresholds
canny_configs = [
(20, 60), (30, 90), (40, 120), (50, 150),
(60, 180), (80, 200), (100, 250), (30, 200)
]
for low, high in canny_configs:
canny = cv2.Canny(image, low, high)
methods.append((f'canny_{low}_{high}', canny))
# Sobel edges
sobelx = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3)
sobely = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3)
sobel = np.sqrt(sobelx**2 + sobely**2)
sobel_norm = np.uint8(sobel * 255 / np.max(sobel))
methods.append(('sobel_3', sobel_norm))
# Sobel with different kernel sizes
sobelx5 = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5)
sobely5 = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=5)
sobel5 = np.sqrt(sobelx5**2 + sobely5**2)
sobel5_norm = np.uint8(sobel5 * 255 / np.max(sobel5))
methods.append(('sobel_5', sobel5_norm))
# Laplacian
laplacian = cv2.Laplacian(image, cv2.CV_64F)
laplacian_norm = np.uint8(np.absolute(laplacian))
methods.append(('laplacian', laplacian_norm))
# Laplacian of Gaussian
gaussian = cv2.GaussianBlur(image, (3, 3), 0)
log = cv2.Laplacian(gaussian, cv2.CV_64F)
log_norm = np.uint8(np.absolute(log))
methods.append(('log', log_norm))
# Scharr edges
scharrx = cv2.Scharr(image, cv2.CV_64F, 1, 0)
scharry = cv2.Scharr(image, cv2.CV_64F, 0, 1)
scharr = np.sqrt(scharrx**2 + scharry**2)
scharr_norm = np.uint8(scharr * 255 / np.max(scharr))
methods.append(('scharr', scharr_norm))
return methods
def find_all_potential_plates(self, edge_image, method_name=""):
"""Find ALL potential license plate regions with very loose filtering."""
contours, _ = cv2.findContours(edge_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
candidates = []
for contour in contours:
# Basic measurements
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / float(h)
area = cv2.contourArea(contour)
# Very loose filtering
if (self.params['min_width'] <= w <= self.params['max_width'] and
self.params['min_height'] <= h <= self.params['max_height'] and
self.params['min_aspect_ratio'] <= aspect_ratio <= self.params['max_aspect_ratio'] and
self.params['min_area'] <= area <= self.params['max_area']):
# Geometric quality
hull = cv2.convexHull(contour)
hull_area = cv2.contourArea(hull)
solidity = area / hull_area if hull_area > 0 else 0
extent = area / (w * h)
# Calculate likelihood score
likelihood_score = self.score_license_plate_likelihood((x, y, w, h), area, solidity, extent)
candidates.append({
'contour': contour,
'bbox': (x, y, w, h),
'aspect_ratio': aspect_ratio,
'area': area,
'solidity': solidity,
'extent': extent,
'likelihood_score': likelihood_score,
'method': method_name
})
return candidates
def save_comprehensive_visualization(self, image, candidates, method_name, top_n=20):
"""Save visualization with many candidates."""
if len(image.shape) == 2:
vis_img = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
else:
vis_img = image.copy()
# Sort by likelihood score
candidates_sorted = sorted(candidates, key=lambda x: x['likelihood_score'], reverse=True)
colors = [
(0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255),
(0, 255, 255), (128, 255, 0), (255, 128, 0), (128, 0, 255), (0, 128, 255),
(255, 255, 128), (255, 128, 255), (128, 255, 255), (192, 192, 0), (192, 0, 192),
(0, 192, 192), (64, 255, 64), (255, 64, 64), (64, 64, 255), (128, 128, 128)
]
for i, candidate in enumerate(candidates_sorted[:top_n]):
x, y, w, h = candidate['bbox']
color = colors[i % len(colors)]
# Draw rectangle
thickness = 3 if i < 5 else 2
cv2.rectangle(vis_img, (x, y), (x + w, y + h), color, thickness)
# Add label
label = f"#{i+1}:{candidate['likelihood_score']:.0f}"
font_scale = 0.7 if i < 5 else 0.5
cv2.putText(vis_img, label, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, 2)
# Add size info for top candidates
if i < 10:
size_label = f"{w}x{h}"
cv2.putText(vis_img, size_label, (x, y + h + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1)
output_path = self.debug_dir / f"comprehensive_{method_name}.jpg"
cv2.imwrite(str(output_path), vis_img)
return output_path
def extract_and_ocr_region(self, image, candidate, candidate_id):
"""Extract region and try comprehensive OCR."""
x, y, w, h = candidate['bbox']
# Add padding
padding = max(5, min(w, h) // 10)
x1 = max(0, x - padding)
y1 = max(0, y - padding)
x2 = min(image.shape[1], x + w + padding)
y2 = min(image.shape[0], y + h + padding)
region = image[y1:y2, x1:x2]
if region.size == 0:
return []
# Save original region
cv2.imwrite(str(self.debug_dir / f"region_{candidate_id:02d}_original.jpg"), region)
# Multiple preprocessing approaches
preprocessed = []
# 1. Original
preprocessed.append(('original', region))
# 2. Multiple thresholding methods
_, otsu = cv2.threshold(region, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
preprocessed.append(('otsu', otsu))
_, inv_otsu = cv2.threshold(region, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
preprocessed.append(('inv_otsu', inv_otsu))
# 3. Adaptive thresholding with different parameters
if region.shape[0] > 10 and region.shape[1] > 10:
adaptive1 = cv2.adaptiveThreshold(region, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
preprocessed.append(('adaptive_11_2', adaptive1))
if min(region.shape) > 20:
adaptive2 = cv2.adaptiveThreshold(region, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, 5)
preprocessed.append(('adaptive_15_5', adaptive2))
# 4. Morphological operations
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
if len(otsu.shape) == 2:
morph_close = cv2.morphologyEx(otsu, cv2.MORPH_CLOSE, kernel)
preprocessed.append(('morph_close', morph_close))
morph_open = cv2.morphologyEx(otsu, cv2.MORPH_OPEN, kernel)
preprocessed.append(('morph_open', morph_open))
# 5. Contrast enhancement
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
enhanced = clahe.apply(region)
preprocessed.append(('enhanced', enhanced))
# 6. Resize if too small
if w < 100 or h < 25:
scale_factor = max(100/w, 25/h, 2.0)
new_w, new_h = int(w * scale_factor), int(h * scale_factor)
resized = cv2.resize(region, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
preprocessed.append(('resized', resized))
# Also resize thresholded versions
resized_otsu = cv2.resize(otsu, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
preprocessed.append(('resized_otsu', resized_otsu))
# OCR configurations
ocr_configs = [
('psm6', '--psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
('psm7', '--psm 7 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
('psm8', '--psm 8 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
('psm10', '--psm 10 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
('psm13', '--psm 13'),
('default', ''),
('digits', '-c tessedit_char_whitelist=0123456789'),
('letters', '-c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
]
results = []
for preprocess_name, processed_img in preprocessed:
# Save preprocessed image
cv2.imwrite(str(self.debug_dir / f"region_{candidate_id:02d}_{preprocess_name}.jpg"), processed_img)
for config_name, config in ocr_configs:
try:
pil_img = Image.fromarray(processed_img)
text = pytesseract.image_to_string(pil_img, config=config).strip()
# Clean text
clean_text = re.sub(r'[^A-Z0-9]', '', text.upper())
if len(clean_text) >= 2: # Very loose requirement
confidence = self.calculate_comprehensive_confidence(clean_text, candidate)
results.append({
'text': clean_text,
'confidence': confidence,
'preprocessing': preprocess_name,
'config': config_name,
'raw_text': text,
'candidate_score': candidate['likelihood_score']
})
except Exception as e:
continue
return results
def calculate_comprehensive_confidence(self, text, candidate):
"""Calculate confidence for any potential license plate text."""
if not text or len(text) < 2:
return 0
score = 0
# Length scoring
if 6 <= len(text) <= 8:
score += 30
elif 4 <= len(text) <= 9:
score += 20
elif 3 <= len(text) <= 10:
score += 10
else:
score += 5
# Character composition
has_letter = any(c.isalpha() for c in text)
has_number = any(c.isdigit() for c in text)
if has_letter and has_number:
score += 25
elif has_letter or has_number:
score += 15
# Maryland-specific patterns
if len(text) == 7:
if text[:3].isalpha() and text[3:].isdigit():
score += 30 # ABC1234
elif text[0].isdigit() and text[1:4].isalpha() and text[4:].isdigit():
score += 25 # 1ABC234
elif len(text) == 6:
if text[:3].isalpha() and text[3:].isdigit():
score += 25 # ABC123
elif text[:2].isalpha() and text[2:].isdigit():
score += 20 # AB1234
# Geometric bonus
score += candidate['likelihood_score'] * 0.3
# Penalize too many repeated characters
unique_chars = len(set(text))
if unique_chars < len(text) * 0.5:
score -= 15
# Bonus for reasonable character diversity
if unique_chars >= 3:
score += 10
return max(0, score)
def remove_overlapping_candidates(self, candidates, overlap_threshold=0.3):
"""Remove overlapping candidates, keeping highest scoring ones."""
if not candidates:
return []
# Sort by likelihood score
candidates.sort(key=lambda x: x['likelihood_score'], reverse=True)
unique = []
for candidate in candidates:
bbox1 = candidate['bbox']
is_duplicate = False
for existing in unique:
bbox2 = existing['bbox']
if self.calculate_overlap(bbox1, bbox2) > overlap_threshold:
is_duplicate = True
break
if not is_duplicate:
unique.append(candidate)
return unique
def calculate_overlap(self, bbox1, bbox2):
"""Calculate intersection over union."""
x1, y1, w1, h1 = bbox1
x2, y2, w2, h2 = bbox2
x_left = max(x1, x2)
y_top = max(y1, y2)
x_right = min(x1 + w1, x2 + w2)
y_bottom = min(y1 + h1, y2 + h2)
if x_right < x_left or y_bottom < y_top:
return 0.0
intersection = (x_right - x_left) * (y_bottom - y_top)
area1 = w1 * h1
area2 = w2 * h2
union = area1 + area2 - intersection
return intersection / union if union > 0 else 0.0
def process_image(self, image_path):
"""Process a single image with comprehensive detection."""
print(f"\n=== PROCESSING: {Path(image_path).name} ===")
# Load image
image = cv2.imread(str(image_path))
if image is None:
print(f"Could not load image: {image_path}")
return []
# Save original
cv2.imwrite(str(self.debug_dir / f"00_original_{Path(image_path).stem}.jpg"), image)
# Comprehensive preprocessing
preprocessed_images = self.comprehensive_preprocessing(image)
all_candidates = []
method_count = 0
print(f"Testing {len(preprocessed_images)} preprocessing methods...")
for preprocess_name, preprocessed_img in preprocessed_images:
print(f"\n Preprocessing: {preprocess_name}")
# Save preprocessed image
cv2.imwrite(str(self.debug_dir / f"01_{preprocess_name}_{Path(image_path).stem}.jpg"), preprocessed_img)
# Comprehensive edge detection
edge_methods = self.comprehensive_edge_detection(preprocessed_img)
for edge_name, edge_img in edge_methods:
method_name = f"{preprocess_name}_{edge_name}"
method_count += 1
# Save edge image
cv2.imwrite(str(self.debug_dir / f"02_{method_name}_{Path(image_path).stem}.jpg"), edge_img)
# Find candidates
candidates = self.find_all_potential_plates(edge_img, method_name)
if candidates:
print(f" {edge_name}: {len(candidates)} candidates")
# Add method info
for candidate in candidates:
candidate['full_method'] = method_name
candidate['preprocessing'] = preprocess_name
candidate['edge_detection'] = edge_name
all_candidates.extend(candidates)
# Save visualization for this method
if len(candidates) > 0:
self.save_comprehensive_visualization(image, candidates, method_name, top_n=10)
print(f"\nProcessed {method_count} total method combinations")
print(f"Found {len(all_candidates)} total candidates")
if not all_candidates:
print("No candidates found even with aggressive parameters!")
return []
# Remove overlapping candidates
unique_candidates = self.remove_overlapping_candidates(all_candidates, overlap_threshold=0.2)
print(f"After removing overlaps: {len(unique_candidates)} unique candidates")
# Sort by likelihood score
unique_candidates.sort(key=lambda x: x['likelihood_score'], reverse=True)
# Show top candidates
print(f"\nTop 20 candidates by likelihood score:")
for i, candidate in enumerate(unique_candidates[:20], 1):
x, y, w, h = candidate['bbox']
print(f" {i:2d}. {w:3d}x{h:3d} at ({x:3d},{y:3d}) - Score: {candidate['likelihood_score']:5.1f} - AR: {candidate['aspect_ratio']:.2f} - {candidate['full_method']}")
# Create overall visualization
self.save_comprehensive_visualization(image, unique_candidates, f"all_methods_{Path(image_path).stem}", top_n=30)
# Extract and OCR top candidates
print(f"\nExtracting and OCR'ing top 15 candidates...")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
all_ocr_results = []
for i, candidate in enumerate(unique_candidates[:15]):
print(f"\nCandidate {i+1}: {candidate['bbox'][2]}x{candidate['bbox'][3]} (score: {candidate['likelihood_score']:.1f})")
ocr_results = self.extract_and_ocr_region(gray, candidate, i+1)
all_ocr_results.extend(ocr_results)
# Print OCR results for this candidate
for result in ocr_results:
if result['confidence'] > 10:
print(f" OCR: '{result['text']}' (conf: {result['confidence']:.1f}, {result['preprocessing']}+{result['config']})")
# Sort all OCR results by confidence
if all_ocr_results:
# Remove duplicates, keeping highest confidence
unique_ocr = {}
for result in all_ocr_results:
text = result['text']
if text not in unique_ocr or result['confidence'] > unique_ocr[text]['confidence']:
unique_ocr[text] = result
sorted_results = sorted(unique_ocr.values(), key=lambda x: x['confidence'], reverse=True)
print(f"\n=== ALL OCR RESULTS (Top 20) ===")
for i, result in enumerate(sorted_results[:20], 1):
print(f"{i:2d}. '{result['text']}' (confidence: {result['confidence']:.1f}) - {result['preprocessing']}+{result['config']}")
return [r['text'] for r in sorted_results if r['confidence'] > 5]
print("No valid OCR results found.")
return []
def analyze_project(self):
"""Analyze all images in the project."""
# Get all images from raw directory
image_files = list(self.raw_dir.glob('*'))
image_files = [f for f in image_files if f.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp']]
if not image_files:
print(f"No image files found in {self.raw_dir}")
return
print(f"=== PROJECT ANALYSIS: {len(image_files)} images ===")
all_results = {}
for image_file in image_files:
results = self.process_image(image_file)
all_results[image_file.name] = results
# Generate comprehensive analysis
self.generate_comprehensive_analysis(all_results)
return all_results
def generate_comprehensive_analysis(self, all_results):
"""Generate comprehensive analysis and save to output folder."""
print(f"\n=== GENERATING COMPREHENSIVE ANALYSIS ===")
# Collect all unique candidates with their scores
all_unique_candidates = {}
for image_file, candidates in all_results.items():
for candidate in candidates:
if candidate not in all_unique_candidates:
base_score = self.score_maryland_likelihood(candidate)
# Add multi-image bonus
appearance_count = sum(1 for cands in all_results.values() if candidate in cands)
multi_image_bonus = (appearance_count - 1) * 25
total_score = base_score + multi_image_bonus
all_unique_candidates[candidate] = {
'base_score': base_score,
'multi_image_bonus': multi_image_bonus,
'total_score': total_score,
'appearances': appearance_count,
'sources': [img for img, cands in all_results.items() if candidate in cands]
}
# Sort by total score
ranked_candidates = sorted(all_unique_candidates.items(), key=lambda x: x[1]['total_score'], reverse=True)
# Save detailed results
output_file = self.output_dir / 'comprehensive_results.json'
output_data = {
'project_id': self.project_dir.name,
'analysis_date': str(Path().cwd()),
'total_images': len(all_results),
'total_candidates': len(all_unique_candidates),
'ranked_candidates': [
{
'text': candidate,
'total_score': scores['total_score'],
'base_score': scores['base_score'],
'multi_image_bonus': scores['multi_image_bonus'],
'appearances': scores['appearances'],
'sources': scores['sources']
}
for candidate, scores in ranked_candidates
],
'individual_results': all_results
}
with open(output_file, 'w') as f:
json.dump(output_data, f, indent=2)
# Save top candidates for easy access
top_candidates_file = self.output_dir / 'top_candidates.txt'
with open(top_candidates_file, 'w') as f:
f.write("=== TOP LICENSE PLATE CANDIDATES ===\n\n")
f.write("Rank Candidate Total Base Multi Appears Sources\n")
f.write("---- --------- ----- ---- ----- ------- -------\n")
for i, (candidate, scores) in enumerate(ranked_candidates[:30], 1):
sources = '+'.join([s.split('.')[0] for s in scores['sources']])
f.write(f"{i:3d}. {candidate:9s} {scores['total_score']:3.0f} {scores['base_score']:3.0f} {scores['multi_image_bonus']:3.0f} {scores['appearances']:7d} {sources}\n")
f.write(f"\n=== RECOMMENDATIONS ===\n")
f.write("Start Maryland DMV search with these high-scoring candidates:\n\n")
high_score_candidates = [item for item in ranked_candidates if item[1]['total_score'] >= 50]
for i, (candidate, scores) in enumerate(high_score_candidates[:15], 1):
confidence_level = "HIGH" if scores['total_score'] >= 80 else "MEDIUM" if scores['total_score'] >= 60 else "GOOD"
multi_img = " (BOTH IMAGES)" if scores['appearances'] > 1 else ""
f.write(f" {i:2d}. {candidate:8s} (Score: {scores['total_score']:3.0f}, {confidence_level}){multi_img}\n")
print(f"✓ Analysis complete!")
print(f"✓ Detailed results saved to: {output_file}")
print(f"✓ Top candidates saved to: {top_candidates_file}")
print(f"✓ Found {len(all_unique_candidates)} unique candidates")
print(f"{len([c for c in all_unique_candidates.values() if c['total_score'] >= 50])} high-confidence candidates")
def score_maryland_likelihood(self, text):
"""Score how likely a candidate is to be a Maryland license plate."""
if not text or len(text) < 2:
return 0
score = 0
# Length scoring - Maryland plates are typically 6-7 characters
if len(text) == 7:
score += 40
elif len(text) == 6:
score += 35
elif len(text) == 5:
score += 20
elif len(text) == 4:
score += 15
elif len(text) == 8:
score += 10
else:
score += 5
# Character composition
has_letter = any(c.isalpha() for c in text)
has_number = any(c.isdigit() for c in text)
if has_letter and has_number:
score += 30
elif has_letter or has_number:
score += 15
# Maryland-specific patterns
if len(text) == 7:
if text[:3].isalpha() and text[3:].isdigit():
score += 50 # ABC1234 - most common Maryland format
elif text[0].isdigit() and text[1:4].isalpha() and text[4:].isdigit():
score += 40 # 1ABC234 - also common
elif len(text) == 6:
if text[:3].isalpha() and text[3:].isdigit():
score += 40 # ABC123
elif text[:2].isalpha() and text[2:].isdigit():
score += 30 # AB1234
# Penalize very short results or all same character
if len(text) <= 2:
score -= 20
if len(set(text)) == 1: # All same character
score -= 30
# Bonus for realistic character diversity
unique_chars = len(set(text))
if unique_chars >= 4:
score += 15
elif unique_chars >= 3:
score += 10
# Penalize common OCR errors/noise
noise_patterns = ['SSS', 'EEE', 'AAA', 'OOO', '111', '000']
if text in noise_patterns:
score -= 40
# Penalize obviously wrong patterns
if re.match(r'^[A-Z]{1,2}$', text) or re.match(r'^[0-9]{1,2}$', text):
score -= 20
return max(0, score)
def main():
parser = argparse.ArgumentParser(description='License Plate Detection for Projects')
parser.add_argument('--project-id', type=int, required=True, help='Project ID')
parser.add_argument('--image', help='Specific image to process')
args = parser.parse_args()
project_dir = Path(f"projects/{args.project_id:03d}")
if not project_dir.exists():
print(f"Project {args.project_id:03d} does not exist. Create it first.")
return
detector = ProjectDetector(project_dir)
if args.image:
# Process specific image
image_path = project_dir / 'raw' / args.image
if not image_path.exists():
print(f"Image {args.image} not found in project {args.project_id:03d}")
return
results = detector.process_image(image_path)
print(f"\nResults for {args.image}: {len(results)} candidates")
for i, result in enumerate(results[:10], 1):
print(f" {i}. {result}")
else:
# Analyze entire project
detector.analyze_project()
if __name__ == '__main__':
main()