#!/usr/bin/env python3 """ M4B Chapter Extractor A command-line tool to extract chapters from M4B audiobook files and save them as individual MP3 files. Requirements: - ffmpeg installed and accessible in PATH - Python 3.6+ Usage: python m4b_chapter_extractor.py input.m4b -o output_folder """ import argparse import os import sys import subprocess import json import re from pathlib import Path from typing import List, Dict, Optional class M4BChapterExtractor: def __init__(self, input_file: str, output_dir: str, quality: str = "192k"): self.input_file = Path(input_file) self.output_dir = Path(output_dir) self.quality = quality # Validate input file if not self.input_file.exists(): raise FileNotFoundError(f"Input file not found: {input_file}") if not self.input_file.suffix.lower() in ['.m4b', '.m4a']: raise ValueError("Input file must be an M4B or M4A file") # Create output directory self.output_dir.mkdir(parents=True, exist_ok=True) def check_ffmpeg(self) -> bool: """Check if FFmpeg is available in the system PATH.""" try: result = subprocess.run(['ffmpeg', '-version'], capture_output=True, text=True) return result.returncode == 0 except FileNotFoundError: return False def get_chapters(self) -> List[Dict]: """Extract chapter information from the M4B file.""" cmd = [ 'ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_chapters', str(self.input_file) ] try: result = subprocess.run(cmd, capture_output=True, text=True, check=True) data = json.loads(result.stdout) return data.get('chapters', []) except subprocess.CalledProcessError as e: print(f"Error getting chapters: {e}") return [] except json.JSONDecodeError as e: print(f"Error parsing chapter data: {e}") return [] def sanitize_filename(self, filename: str) -> str: """Sanitize filename by removing/replacing invalid characters.""" # Remove or replace invalid characters filename = re.sub(r'[<>:"/\\|?*]', '', filename) filename = re.sub(r'\s+', ' ', filename).strip() # Ensure filename isn't too long (limit to 200 characters) if len(filename) > 200: filename = filename[:200].strip() return filename or "Chapter" def format_time(self, seconds: float) -> str: """Convert seconds to HH:MM:SS.mmm format.""" hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) secs = seconds % 60 return f"{hours:02d}:{minutes:02d}:{secs:06.3f}" def extract_chapter(self, chapter: Dict, chapter_num: int, total_chapters: int) -> bool: """Extract a single chapter to MP3 file.""" # Get chapter title title = chapter.get('tags', {}).get('title', f"Chapter {chapter_num:02d}") title = self.sanitize_filename(title) # Create output filename output_filename = f"{chapter_num:02d} - {title}.mp3" output_path = self.output_dir / output_filename # Get start and end times start_time = float(chapter['start_time']) end_time = float(chapter['end_time']) duration = end_time - start_time print(f"Extracting [{chapter_num}/{total_chapters}]: {title}") print(f" Duration: {self.format_time(duration)}") # FFmpeg command to extract chapter cmd = [ 'ffmpeg', '-i', str(self.input_file), '-ss', str(start_time), '-t', str(duration), '-acodec', 'libmp3lame', '-ab', self.quality, '-map_metadata', '0', '-id3v2_version', '3', '-metadata', f'title={title}', '-metadata', f'track={chapter_num}/{total_chapters}', '-y', # Overwrite output file str(output_path) ] try: result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode == 0: print(f" ✓ Saved: {output_filename}") return True else: print(f" ✗ Error extracting chapter: {result.stderr}") return False except Exception as e: print(f" ✗ Exception during extraction: {e}") return False def extract_all_chapters(self) -> bool: """Extract all chapters from the M4B file.""" print(f"Processing: {self.input_file.name}") print(f"Output directory: {self.output_dir}") # Check if FFmpeg is available if not self.check_ffmpeg(): print("Error: FFmpeg not found. Please install FFmpeg and ensure it's in your PATH.") return False # Get chapters chapters = self.get_chapters() if not chapters: print("No chapters found in the M4B file.") return False print(f"Found {len(chapters)} chapters") print("-" * 50) # Extract each chapter success_count = 0 for i, chapter in enumerate(chapters, 1): if self.extract_chapter(chapter, i, len(chapters)): success_count += 1 print() # Summary print("-" * 50) print(f"Extraction complete: {success_count}/{len(chapters)} chapters extracted successfully") if success_count == len(chapters): print("All chapters extracted successfully!") return True else: print(f"Warning: {len(chapters) - success_count} chapters failed to extract") return False def main(): parser = argparse.ArgumentParser( description="Extract chapters from M4B audiobook files as individual MP3 files", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python m4b_chapter_extractor.py audiobook.m4b -o chapters/ python m4b_chapter_extractor.py audiobook.m4b -o output/ -q 128k python m4b_chapter_extractor.py audiobook.m4b -o output/ --quality 256k Requirements: - FFmpeg must be installed and accessible in PATH - Input file must be M4B or M4A format """ ) parser.add_argument( 'input_file', help='Path to the input M4B audiobook file' ) parser.add_argument( '-o', '--output', required=True, help='Output directory for extracted MP3 chapters' ) parser.add_argument( '-q', '--quality', default='192k', help='MP3 audio quality/bitrate (default: 192k). Examples: 128k, 192k, 256k, 320k' ) parser.add_argument( '-v', '--verbose', action='store_true', help='Enable verbose output' ) args = parser.parse_args() try: # Create extractor instance extractor = M4BChapterExtractor( input_file=args.input_file, output_dir=args.output, quality=args.quality ) # Extract chapters success = extractor.extract_all_chapters() # Exit with appropriate code sys.exit(0 if success else 1) except FileNotFoundError as e: print(f"Error: {e}") sys.exit(1) except ValueError as e: print(f"Error: {e}") sys.exit(1) except KeyboardInterrupt: print("\nOperation cancelled by user") sys.exit(1) except Exception as e: print(f"Unexpected error: {e}") sys.exit(1) if __name__ == "__main__": main()