Files
ebook2audiobook/tools/m4b_chapter_extractor.py
2025-07-11 21:38:52 -04:00

245 lines
7.8 KiB
Python

#!/usr/bin/env python3
"""
M4B Chapter Extractor
A command-line tool to extract chapters from M4B audiobook files
and save them as individual MP3 files.
Requirements:
- ffmpeg installed and accessible in PATH
- Python 3.6+
Usage:
python m4b_chapter_extractor.py input.m4b -o output_folder
"""
import argparse
import os
import sys
import subprocess
import json
import re
from pathlib import Path
from typing import List, Dict, Optional
class M4BChapterExtractor:
def __init__(self, input_file: str, output_dir: str, quality: str = "192k"):
self.input_file = Path(input_file)
self.output_dir = Path(output_dir)
self.quality = quality
# Validate input file
if not self.input_file.exists():
raise FileNotFoundError(f"Input file not found: {input_file}")
if not self.input_file.suffix.lower() in ['.m4b', '.m4a']:
raise ValueError("Input file must be an M4B or M4A file")
# Create output directory
self.output_dir.mkdir(parents=True, exist_ok=True)
def check_ffmpeg(self) -> bool:
"""Check if FFmpeg is available in the system PATH."""
try:
result = subprocess.run(['ffmpeg', '-version'],
capture_output=True, text=True)
return result.returncode == 0
except FileNotFoundError:
return False
def get_chapters(self) -> List[Dict]:
"""Extract chapter information from the M4B file."""
cmd = [
'ffprobe',
'-v', 'quiet',
'-print_format', 'json',
'-show_chapters',
str(self.input_file)
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
data = json.loads(result.stdout)
return data.get('chapters', [])
except subprocess.CalledProcessError as e:
print(f"Error getting chapters: {e}")
return []
except json.JSONDecodeError as e:
print(f"Error parsing chapter data: {e}")
return []
def sanitize_filename(self, filename: str) -> str:
"""Sanitize filename by removing/replacing invalid characters."""
# Remove or replace invalid characters
filename = re.sub(r'[<>:"/\\|?*]', '', filename)
filename = re.sub(r'\s+', ' ', filename).strip()
# Ensure filename isn't too long (limit to 200 characters)
if len(filename) > 200:
filename = filename[:200].strip()
return filename or "Chapter"
def format_time(self, seconds: float) -> str:
"""Convert seconds to HH:MM:SS.mmm format."""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = seconds % 60
return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
def extract_chapter(self, chapter: Dict, chapter_num: int, total_chapters: int) -> bool:
"""Extract a single chapter to MP3 file."""
# Get chapter title
title = chapter.get('tags', {}).get('title', f"Chapter {chapter_num:02d}")
title = self.sanitize_filename(title)
# Create output filename
output_filename = f"{chapter_num:02d} - {title}.mp3"
output_path = self.output_dir / output_filename
# Get start and end times
start_time = float(chapter['start_time'])
end_time = float(chapter['end_time'])
duration = end_time - start_time
print(f"Extracting [{chapter_num}/{total_chapters}]: {title}")
print(f" Duration: {self.format_time(duration)}")
# FFmpeg command to extract chapter
cmd = [
'ffmpeg',
'-i', str(self.input_file),
'-ss', str(start_time),
'-t', str(duration),
'-acodec', 'libmp3lame',
'-ab', self.quality,
'-map_metadata', '0',
'-id3v2_version', '3',
'-metadata', f'title={title}',
'-metadata', f'track={chapter_num}/{total_chapters}',
'-y', # Overwrite output file
str(output_path)
]
try:
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
print(f" ✓ Saved: {output_filename}")
return True
else:
print(f" ✗ Error extracting chapter: {result.stderr}")
return False
except Exception as e:
print(f" ✗ Exception during extraction: {e}")
return False
def extract_all_chapters(self) -> bool:
"""Extract all chapters from the M4B file."""
print(f"Processing: {self.input_file.name}")
print(f"Output directory: {self.output_dir}")
# Check if FFmpeg is available
if not self.check_ffmpeg():
print("Error: FFmpeg not found. Please install FFmpeg and ensure it's in your PATH.")
return False
# Get chapters
chapters = self.get_chapters()
if not chapters:
print("No chapters found in the M4B file.")
return False
print(f"Found {len(chapters)} chapters")
print("-" * 50)
# Extract each chapter
success_count = 0
for i, chapter in enumerate(chapters, 1):
if self.extract_chapter(chapter, i, len(chapters)):
success_count += 1
print()
# Summary
print("-" * 50)
print(f"Extraction complete: {success_count}/{len(chapters)} chapters extracted successfully")
if success_count == len(chapters):
print("All chapters extracted successfully!")
return True
else:
print(f"Warning: {len(chapters) - success_count} chapters failed to extract")
return False
def main():
parser = argparse.ArgumentParser(
description="Extract chapters from M4B audiobook files as individual MP3 files",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python m4b_chapter_extractor.py audiobook.m4b -o chapters/
python m4b_chapter_extractor.py audiobook.m4b -o output/ -q 128k
python m4b_chapter_extractor.py audiobook.m4b -o output/ --quality 256k
Requirements:
- FFmpeg must be installed and accessible in PATH
- Input file must be M4B or M4A format
"""
)
parser.add_argument(
'input_file',
help='Path to the input M4B audiobook file'
)
parser.add_argument(
'-o', '--output',
required=True,
help='Output directory for extracted MP3 chapters'
)
parser.add_argument(
'-q', '--quality',
default='192k',
help='MP3 audio quality/bitrate (default: 192k). Examples: 128k, 192k, 256k, 320k'
)
parser.add_argument(
'-v', '--verbose',
action='store_true',
help='Enable verbose output'
)
args = parser.parse_args()
try:
# Create extractor instance
extractor = M4BChapterExtractor(
input_file=args.input_file,
output_dir=args.output,
quality=args.quality
)
# Extract chapters
success = extractor.extract_all_chapters()
# Exit with appropriate code
sys.exit(0 if success else 1)
except FileNotFoundError as e:
print(f"Error: {e}")
sys.exit(1)
except ValueError as e:
print(f"Error: {e}")
sys.exit(1)
except KeyboardInterrupt:
print("\nOperation cancelled by user")
sys.exit(1)
except Exception as e:
print(f"Unexpected error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()