Compare commits

39 Commits

Author SHA1 Message Date
sHa
2e5cef4424 Refactor code structure for improved readability and maintainability 2026-01-05 17:11:42 +00:00
sHa
b4b7709f25 bump: update version to 0.8.10 and remove logging info from scan_files method 2026-01-05 15:01:05 +00:00
sHa
8031c97999 Add singleton logging configuration for the renamer application
This commit introduces a new module `logging_config.py` that implements a singleton pattern for logging configuration. The logger is initialized only once and can be configured based on an environment variable to log to a file or to the console. This centralizes logging setup and ensures consistent logging behavior throughout the application.
2026-01-05 14:54:03 +00:00
sHa
ad39632e91 refactor: enhance scan type detection logic and add test case for interlaced 1080i frame class 2026-01-05 08:51:43 +00:00
sHa
ab2f67b780 refactor: standardize text color to olive for media panel properties 2026-01-05 07:53:29 +00:00
sHa
96f1a5b66d Refactor code structure for improved readability and maintainability 2026-01-05 07:44:52 +00:00
sHa
1373b4c5db Refactor code structure for improved readability and maintainability 2026-01-04 21:05:00 +00:00
sHa
5328949d6a Refactor code structure for improved readability and maintainability 2026-01-04 20:58:28 +00:00
sHa
336b030a6b refactor: Consolidate text color decorators and update related properties 2026-01-04 20:49:55 +00:00
sHa
3f8b158135 update bitrate calculation in TrackFormatter 2026-01-04 18:38:54 +00:00
sHa
442bde73e5 feat: Implement poster rendering options with ASCII, Viu, and RichPixels support 2026-01-04 18:35:30 +00:00
sHa
9b353a7e7e feat: Update ToDo list with MKV metadata editing capabilities and bump version to 0.7.10 2026-01-04 17:10:34 +00:00
sHa
0c3a173819 refactor: Remove FormatterApplier class and update related imports and tests 2026-01-04 15:13:48 +00:00
sHa
cffd68c687 fix: Update scan commands and key bindings for improved usability 2026-01-04 14:33:31 +00:00
sHa
e4314f1587 fix: Update conversion service and UI to support MP4 files in conversion process 2026-01-04 13:41:35 +00:00
sHa
13d610b1c3 fix: Update conversion service to support WebM files and improve documentation 2026-01-04 13:15:32 +00:00
sHa
ae44976bcc feat: Add HEVC encoding options and support for MPG/MPEG formats in conversion 2026-01-04 12:33:36 +00:00
sHa
3902dae435 Refactor code structure for improved readability and maintainability 2026-01-03 20:47:17 +00:00
sHa
faeda55dca Refactor code structure for improved readability and maintainability 2026-01-03 20:29:24 +00:00
sHa
5e4ab232ee fix: Improve poster handling in catalog formatting and ensure proper output rendering 2026-01-03 19:44:19 +00:00
sHa
65d9759880 chore: Update version to 0.7.1 and improve poster display using viu 2026-01-03 19:40:24 +00:00
sHa
390e8e8f83 fix convert error, improoved tmdb data retrive 2026-01-03 19:19:47 +00:00
sHa
24f31166d3 remove old releses 2026-01-03 16:40:08 +00:00
sHa
4e9200b8d1 Refactor code structure for improved readability and maintainability 2026-01-03 16:35:18 +00:00
sHa
06cf206c70 feat: Enhance title normalization by replacing dots with spaces and add test case for multi-dot filenames 2026-01-03 16:13:50 +00:00
sHa
0ec1fbe4db feat: Add genre extraction to media properties and icons to tree 2026-01-03 15:34:27 +00:00
sHa
ef1e1e06ca feat: Add delete file functionality with confirmation screen 2026-01-03 15:08:48 +00:00
sHa
b45e629825 Refactor code structure for improved readability and maintainability 2026-01-03 14:54:50 +00:00
sHa
6fee7d9f63 Add ConversionService for AVI to MKV remux with metadata preservation
- Implemented a new service to convert AVI files to MKV format while preserving metadata.
- Added methods for validating AVI files, detecting subtitle files, and mapping audio languages.
- Built ffmpeg command for fast remuxing without re-encoding.
- Included error handling and logging for conversion processes.
2026-01-03 14:29:30 +00:00
sHa
917d25b360 Add decorators for formatting various media attributes
- Introduced `DurationDecorators` for full and short duration formatting.
- Added `ExtensionDecorators` for formatting extension information.
- Created `ResolutionDecorators` for formatting resolution dimensions.
- Implemented `SizeDecorators` for full and short size formatting.
- Enhanced `TextDecorators` with additional formatting options including blue and grey text, URL formatting, and escaping rich markup.
- Developed `TrackDecorators` for formatting video, audio, and subtitle track data.
- Refactored `MediaPanelView` to utilize a new `MediaPanelProperties` class for cleaner property management and formatting.
- Updated `media_panel_properties.py` to include formatted properties for file info, TMDB data, metadata extraction, media info extraction, and filename extraction.
- Bumped version to 0.6.5 in `uv.lock`.
2026-01-03 10:13:17 +00:00
sHa
6bca3c224d Refactor code structure for improved readability and maintainability 2026-01-02 12:45:31 +00:00
sHa
a85ecdb52d fix: Increase width tolerance for frame class matching to accommodate ultrawide aspect ratios 2026-01-02 12:43:55 +00:00
sHa
981000793f Add ProposedFilenameView and MediaPanelView with comprehensive tests
- Implemented ProposedFilenameView to generate standardized filenames using a decorator pattern.
- Created MediaPanelView to assemble media data panels for display, aggregating multiple formatters.
- Added tests for ProposedFilenameView covering various formatting scenarios, including basic, minimal, and special cases.
- Introduced a views package to organize and expose the new views.
- Ensured proper formatting and display of media information, including file info, TMDB data, and track information.
2026-01-02 12:29:04 +00:00
sHa
e64aaf320b chore: Bump version to 0.6.1 and update decorators to use new cache system 2026-01-02 11:01:08 +00:00
sHa
60f32a7e8c refactor: Remove old decorators and integrate caching into the new cache subsystem
- Deleted the `renamer.decorators` package, including `caching.py` and `__init__.py`, to streamline the codebase.
- Updated tests to reflect changes in import paths for caching decorators.
- Added a comprehensive changelog to document major refactoring efforts and future plans.
- Introduced an engineering guide detailing architecture, core components, and development setup.
2026-01-02 08:12:28 +00:00
sHa
7c7e9ab1e1 Refactor code structure and remove redundant code blocks for improved readability and maintainability 2026-01-02 07:14:33 +00:00
sHa
262c0a7b7d Add comprehensive tests for formatter classes, services, and utilities
- Introduced tests for various formatter classes including TextFormatter, DurationFormatter, SizeFormatter, DateFormatter, and more to ensure correct formatting behavior.
- Added tests for service classes such as FileTreeService, MetadataService, and RenameService, covering directory validation, metadata extraction, and file renaming functionalities.
- Implemented utility tests for LanguageCodeExtractor, PatternExtractor, and FrameClassMatcher to validate their extraction and matching capabilities.
- Updated test cases to use datasets for better maintainability and clarity.
- Enhanced error handling tests to ensure robustness against missing or invalid data.
2025-12-31 14:04:33 +00:00
sHa
c5fbd367fc Add rename service and utility modules for file renaming operations
- Implemented RenameService for handling file renaming with features like name validation, proposed name generation, conflict detection, and atomic rename operations.
- Created utility modules for language code extraction, regex pattern matching, and frame class matching to centralize common functionalities.
- Added comprehensive logging for error handling and debugging across all new modules.
2025-12-31 03:13:26 +00:00
sHa
b50b9bc165 feat(cache): Implement unified caching subsystem with decorators, strategies, and management
- Added core caching functionality with `Cache` class supporting in-memory and file-based caching.
- Introduced `CacheManager` for high-level cache operations and statistics.
- Created various cache key generation strategies: `FilepathMethodStrategy`, `APIRequestStrategy`, `SimpleKeyStrategy`, and `CustomStrategy`.
- Developed decorators for easy method caching: `cached`, `cached_method`, `cached_api`, and `cached_property`.
- Implemented type definitions for cache entries and statistics.
- Added comprehensive tests for cache operations, strategies, and decorators to ensure functionality and backward compatibility.
2025-12-31 02:29:10 +00:00
159 changed files with 13390 additions and 3316 deletions

2
.gitignore vendored
View File

@@ -7,3 +7,5 @@ wheels/
*.log *.log
# Virtual environments # Virtual environments
.venv .venv
# Test-generated files
renamer/test/datasets/sample_mediafiles/

View File

@@ -1,199 +0,0 @@
# AI Agent Instructions for Media File Renamer Project
## Project Description
This is a Python Terminal User Interface (TUI) application for managing media files. It uses the Textual library to provide a curses-like interface in the terminal. The app allows users to scan directories for video files, display them in a hierarchical tree view, view detailed metadata information including video, audio, and subtitle tracks, and rename files based on intelligent metadata extraction.
**Current Version**: 0.5.10
Key features:
- Recursive directory scanning with tree navigation
- Dual-mode display: Technical (codec/track details) and Catalog (TMDB metadata with posters)
- Tree-based file navigation with expand/collapse functionality
- Multi-source metadata extraction (MediaInfo, filename parsing, embedded tags, TMDB API)
- Intelligent file renaming with proposed names and confirmation
- Settings management with persistent configuration
- Advanced caching system with TTL (6h extractors, 6h TMDB, 30d posters)
- Terminal poster display using rich-pixels
- Color-coded information display
- Keyboard and mouse navigation
- Multiple UI screens (main app, directory selection, help, rename confirmation, settings)
- Extensible extractor and formatter architecture
- Loading indicators and comprehensive error handling
## Technology Stack
- Python 3.11+
- Textual ≥6.11.0 (TUI framework)
- PyMediaInfo ≥6.0.0 (detailed track information)
- Mutagen ≥1.47.0 (embedded metadata)
- Python-Magic ≥0.4.27 (MIME type detection)
- Langcodes ≥3.5.1 (language code handling)
- Requests ≥2.31.0 (HTTP client for TMDB API)
- Rich-Pixels ≥1.0.0 (terminal image display)
- Pytest ≥7.0.0 (testing framework)
- UV (package manager and build tool)
## Code Structure
- `renamer/main.py`: Main application entry point with argument parsing
- `pyproject.toml`: Project configuration and dependencies (version 0.5.10)
- `README.md`: User documentation
- `DEVELOP.md`: Developer guide with debugging info
- `INSTALL.md`: Installation instructions
- `CLAUDE.md`: Comprehensive AI assistant reference guide
- `ToDo.md`: Development task tracking
- `AI_AGENT.md`: This file (AI agent instructions)
- `renamer/`: Main package
- `app.py`: Main Textual application class with tree management and file operations
- `settings.py`: Settings management with JSON storage
- `cache.py`: File-based caching system with TTL support
- `secrets.py`: API keys and secrets (TMDB)
- `constants.py`: Application constants (media types, sources, resolutions, special editions)
- `screens.py`: Additional UI screens (OpenScreen, HelpScreen, RenameConfirmScreen, SettingsScreen)
- `bump.py`: Version bump utility
- `release.py`: Release automation script
- `extractors/`: Individual extractor classes
- `extractor.py`: MediaExtractor class coordinating all extractors
- `mediainfo_extractor.py`: PyMediaInfo-based extraction
- `filename_extractor.py`: Filename parsing with regex patterns
- `metadata_extractor.py`: Mutagen-based embedded metadata
- `fileinfo_extractor.py`: Basic file information
- `tmdb_extractor.py`: The Movie Database API integration
- `default_extractor.py`: Fallback extractor
- `formatters/`: Data formatting classes
- `formatter.py`: Base formatter interface
- `media_formatter.py`: Main formatter coordinating display
- `catalog_formatter.py`: Catalog mode formatting with TMDB data
- `proposed_name_formatter.py`: Generates rename suggestions
- `track_formatter.py`: Track information formatting
- `size_formatter.py`: File size formatting
- `date_formatter.py`: Timestamp formatting
- `duration_formatter.py`: Duration formatting
- `resolution_formatter.py`: Resolution formatting
- `text_formatter.py`: Text styling utilities
- `extension_formatter.py`: File extension formatting
- `helper_formatter.py`: Helper formatting utilities
- `special_info_formatter.py`: Special edition information
- `decorators/`: Utility decorators
- `caching.py`: Caching decorator for automatic method caching
- `test/`: Unit tests for extractors
- `test_filename_extractor.py`: Filename parsing tests
- `test_mediainfo_extractor.py`: MediaInfo extraction tests
- `test_mediainfo_frame_class.py`: Frame class detection tests
- `test_fileinfo_extractor.py`: File info tests
- `test_metadata_extractor.py`: Metadata extraction tests
- `test_filename_detection.py`: Filename pattern detection tests
- `filenames.txt`, `test_filenames.txt`: Sample test data
- `test_cases.json`, `test_mediainfo_frame_class.json`: Test fixtures
## Instructions for AI Agents
### Coding Standards
- Use type hints where possible
- Follow PEP 8 style guidelines
- Use descriptive variable and function names
- Add docstrings for functions and classes
- Handle exceptions appropriately
- Use pathlib for file operations
### Development Workflow
1. Read the current code and understand the architecture
2. Check the ToDo.md for pending tasks
3. Implement features incrementally
4. Test changes by running the app with `uv run python main.py [directory]`
5. Update tests as needed
6. Ensure backward compatibility
7. Update documentation (README.md, ToDo.md) when adding features
### Key Components
- `RenamerApp`: Main application class inheriting from Textual's App
- Manages the tree view and file operations
- Handles keyboard navigation and commands
- Coordinates metadata extraction and display
- Implements efficient tree updates for renamed files
- `MediaTree`: Custom Tree widget with file-specific styling (inherited from Textual Tree)
- `MediaExtractor`: Coordinates multiple specialized extractors
- `MediaFormatter`: Formats extracted data for TUI display
- Various extractor classes for different data sources
- Various formatter classes for different data types
- Screen classes for different UI states
### Extractor Architecture
Extractors are responsible for gathering raw data from different sources:
- Each extractor inherits from no base class but follows the pattern of `__init__(file_path)` and `extract_*()` methods
- The `MediaExtractor` class coordinates multiple extractors and provides a unified `get()` interface
- Extractors return raw data (strings, numbers, dicts) without formatting
### Formatter Architecture
Formatters are responsible for converting raw data into display strings:
- Each formatter provides static methods like `format_*()`
- The `MediaFormatter` coordinates formatters and applies them based on data types
- `ProposedNameFormatter` generates intelligent rename suggestions
- Formatters handle text styling, color coding, and human-readable representations
### Screen Architecture
The app uses multiple screens for different operations:
- `OpenScreen`: Directory selection with input validation
- `HelpScreen`: Comprehensive help with key bindings
- `RenameConfirmScreen`: File rename confirmation with error handling
### Completed Major Features
- ✅ Settings management with JSON configuration
- ✅ Mode toggle (technical/catalog)
- ✅ Caching system with TTL support
- ✅ TMDB integration for catalog data
- ✅ Poster display in terminal
- ✅ Settings UI screen
### Future Enhancements
- Metadata editing capabilities
- Batch rename operations
- Plugin system for custom extractors/formatters
- Advanced search and filtering
- Undo/redo functionality
- Blue highlighting for changed parts in proposed filename
- Exclude dev commands from distributed package
- Full genre name expansion (currently shows codes)
- Optimized poster quality and display
### Testing
- Run the app with `uv run python main.py [directory]`
- Test navigation, selection, and display
- Verify metadata extraction accuracy
- Test file renaming functionality
- Check for any errors or edge cases
- Run unit tests with `uv run pytest`
### Contribution Guidelines
- Make small, focused changes
- Update documentation as needed
- Ensure the app runs without errors
- Follow the existing code patterns
- Update tests for new functionality
- Update ToDo.md when completing tasks
- Update version numbers appropriately
## Important Files for AI Assistants
For comprehensive project information, AI assistants should refer to:
1. **CLAUDE.md**: Complete AI assistant reference guide (most comprehensive)
2. **AI_AGENT.md**: This file (concise instructions)
3. **DEVELOP.md**: Developer setup and debugging
4. **ToDo.md**: Current task list and completed items
5. **README.md**: User-facing documentation
This document should be updated as the project evolves.
---
**Last Updated**: 2025-12-31

225
CHANGELOG.md Normal file
View File

@@ -0,0 +1,225 @@
# Changelog
All notable changes to the Renamer project are documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
---
## [Unreleased]
### Future Plans
See [REFACTORING_PROGRESS.md](REFACTORING_PROGRESS.md) and [ToDo.md](ToDo.md) for upcoming features and improvements.
---
## [0.7.0-dev] - 2026-01-01
### Major Refactoring (Phases 1-3)
This development version represents a significant refactoring effort focused on code quality, architecture, and maintainability.
---
### Phase 3: Code Quality (COMPLETED)
#### Added
- **Type Hints**: Complete type coverage for `DefaultExtractor` (21 methods)
- **Mypy Integration**: Added mypy>=1.0.0 as dev dependency for type checking
- **Comprehensive Docstrings**: Added module + class + method docstrings to 5 key files:
- `default_extractor.py` - 22 docstrings
- `extractor.py` - Enhanced with examples
- `fileinfo_extractor.py` - Enhanced with Args/Returns
- `metadata_extractor.py` - Enhanced with examples
- `formatter.py` - Enhanced FormatterApplier
#### Changed
- **Constants Reorganization**: Split monolithic `constants.py` into 8 logical modules:
- `media_constants.py` - Media types
- `source_constants.py` - Video sources
- `frame_constants.py` - Frame classes and quality indicators
- `moviedb_constants.py` - Database identifiers
- `edition_constants.py` - Special editions
- `lang_constants.py` - Skip words for language detection
- `year_constants.py` - Dynamic year validation
- `cyrillic_constants.py` - Character mappings
- **Dynamic Year Validation**: Replaced hardcoded year values with `is_valid_year()` function
- **Language Extraction**: Simplified using `langcodes.Language.get()` for dynamic validation (~80 lines removed)
#### Removed
- **Code Duplication**: Eliminated ~95 lines of duplicated code:
- ~80 lines of hardcoded language lists
- ~15 lines of duplicated movie DB pattern matching
- **Hardcoded Values**: Removed hardcoded quality indicators, year values, Cyrillic mappings
### Phase 2: Architecture Foundation (COMPLETED)
#### Added
- **Base Classes and Protocols** (409 lines):
- `DataExtractor` Protocol defining extractor interface (23 methods)
- `Formatter` ABCs: `DataFormatter`, `TextFormatter`, `MarkupFormatter`, `CompositeFormatter`
- **Service Layer** (935 lines):
- `FileTreeService`: Directory scanning and validation
- `MetadataService`: Thread-pooled metadata extraction with cancellation support
- `RenameService`: Filename validation, sanitization, and atomic renaming
- **Utility Modules** (953 lines):
- `PatternExtractor`: Centralized regex pattern matching
- `LanguageCodeExtractor`: Language code processing
- `FrameClassMatcher`: Resolution/frame class matching
- **Command Palette Integration**:
- `AppCommandProvider`: 8 main app commands
- `CacheCommandProvider`: 7 cache management commands
- Access via Ctrl+P
#### Improved
- **Thread Safety**: MetadataService uses ThreadPoolExecutor with Lock for concurrent operations
- **Testability**: Services can be tested independently of UI
- **Reusability**: Clear interfaces and separation of concerns
### Phase 1: Critical Bug Fixes (COMPLETED)
#### Fixed
- **Cache Key Generation Bug**: Fixed critical variable scoping issue in cache system
- **Resource Leaks**: Fixed file handle leaks in tests (proper context managers)
- **Exception Handling**: Replaced bare `except:` clauses with specific exceptions
#### Added
- **Thread Safety**: Added `threading.RLock` to cache for concurrent access
- **Logging**: Comprehensive logging throughout extractors and formatters:
- Debug: Language code conversions, metadata reads
- Warning: Network failures, API errors, MediaInfo parse failures
- Error: Formatter application failures
#### Changed
- **Unified Cache Subsystem** (500 lines):
- Modular architecture: `core.py`, `types.py`, `strategies.py`, `managers.py`, `decorators.py`
- 4 cache key strategies: `FilepathMethodStrategy`, `APIRequestStrategy`, `SimpleKeyStrategy`, `CustomStrategy`
- Enhanced decorators: `@cached_method()`, `@cached_api()`, `@cached_property()`
- Cache manager operations: `clear_all()`, `clear_by_prefix()`, `clear_expired()`, `compact_cache()`
---
### Phase 5: Test Coverage (PARTIALLY COMPLETED - 4/6)
#### Added
- **Service Tests** (30+ tests): FileTreeService, MetadataService, RenameService
- **Utility Tests** (70+ tests): PatternExtractor, LanguageCodeExtractor, FrameClassMatcher
- **Formatter Tests** (40+ tests): All formatter classes and FormatterApplier
- **Cache Tests** (18 tests): Cache subsystem functionality
- **Dataset Organization**:
- `filename_patterns.json`: 46 comprehensive test cases
- `frame_class_tests.json`: 25 frame class test cases
- Sample file generator: `fill_sample_mediafiles.py`
- Dataset loaders in `conftest.py`
#### Changed
- **Test Organization**: Consolidated test data into `renamer/test/datasets/`
- **Total Tests**: 560 tests (1 skipped), all passing
---
### Documentation Improvements
#### Added
- **ENGINEERING_GUIDE.md**: Comprehensive 900+ line technical reference
- **CHANGELOG.md**: This file
#### Changed
- **CLAUDE.md**: Streamlined to pointer to ENGINEERING_GUIDE.md
- **AI_AGENT.md**: Marked as deprecated, points to ENGINEERING_GUIDE.md
- **DEVELOP.md**: Streamlined with references to ENGINEERING_GUIDE.md
- **README.md**: Streamlined user guide with references
#### Removed
- Outdated version information from documentation files
- Duplicated content now in ENGINEERING_GUIDE.md
---
### Breaking Changes
#### Cache System
- **Cache key format changed**: Old cache files are invalid
- **Migration**: Users should clear cache: `rm -rf ~/.cache/renamer/`
- **Impact**: No data loss, just cache miss on first run after upgrade
#### Dependencies
- **Added**: mypy>=1.0.0 as dev dependency
---
### Statistics
#### Code Quality Metrics
- **Lines Added**: ~3,497 lines
- Phase 1: ~500 lines (cache subsystem)
- Phase 2: ~2,297 lines (base classes + services + utilities)
- Phase 3: ~200 lines (docstrings)
- Phase 5: ~500 lines (new tests)
- **Lines Removed**: ~290 lines through code duplication elimination
- **Net Gain**: ~3,207 lines of quality code
#### Test Coverage
- **Total Tests**: 560 (was 518)
- **New Tests**: +42 tests (+8%)
- **Pass Rate**: 100% (559 passed, 1 skipped)
#### Architecture Improvements
- ✅ Protocols and ABCs for consistent interfaces
- ✅ Service layer with dependency injection
- ✅ Thread pool for concurrent operations
- ✅ Utility modules for shared logic
- ✅ Command palette for unified access
- ✅ Type hints and mypy integration
- ✅ Comprehensive docstrings
---
## [0.6.0] - 2025-12-31
### Added
- Initial cache subsystem implementation
- Basic service layer structure
- Protocol definitions for extractors
### Changed
- Refactored cache key generation
- Improved error handling
---
## [0.5.10] - Previous Release
### Features
- Dual display modes (technical/catalog)
- TMDB integration with poster display
- Settings configuration UI
- Persistent caching with TTL
- Intelligent file renaming
- Color-coded information display
- Keyboard and mouse navigation
- Help screen with key bindings
---
## Version History Summary
- **0.7.0-dev** (2026-01-01): Major refactoring - code quality, architecture, testing
- **0.6.0** (2025-12-31): Cache improvements, service layer foundation
- **0.5.x**: Settings, caching, catalog mode, poster display
- **0.4.x**: TMDB integration
- **0.3.x**: Enhanced extractors and formatters
- **0.2.x**: Initial TUI with basic metadata
---
## Links
- [ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md) - Complete technical documentation
- [REFACTORING_PROGRESS.md](REFACTORING_PROGRESS.md) - Future refactoring plans
- [ToDo.md](ToDo.md) - Current task list
---
**Last Updated**: 2026-01-01

451
CLAUDE.md
View File

@@ -1,441 +1,38 @@
# CLAUDE.md - AI Assistant Reference Guide # CLAUDE.md - AI Assistant Reference
This document provides comprehensive project information for AI assistants (like Claude) working on the Renamer project. **Version**: 0.7.0-dev
**Last Updated**: 2026-01-01
## Project Overview > **📘 All technical documentation has been moved to [ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md)**
**Renamer** is a sophisticated Terminal User Interface (TUI) application for managing, viewing metadata, and renaming media files. Built with Python and the Textual framework, it provides an interactive, curses-like interface for media collection management. ## For AI Assistants
### Current Version Please read **[ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md)** for complete project documentation including:
- **Version**: 0.5.10
- **Python**: 3.11+
- **Status**: Active development with media catalog mode features
## Project Purpose - Architecture overview
- Core components
- Development setup
- Testing strategy
- Code standards
- AI assistant instructions
- Release process
Renamer serves two primary use cases: ## Quick Commands
1. **Technical Mode**: Detailed technical metadata viewing (video tracks, audio streams, codecs, bitrates)
2. **Catalog Mode**: Media library catalog view with TMDB integration (posters, ratings, descriptions, genres)
## Architecture Overview
### Core Components
#### Main Application (`renamer/app.py`)
- Main `RenamerApp` class inheriting from Textual's `App`
- Manages TUI layout with split view: file tree (left) and details panel (right)
- Handles keyboard/mouse navigation and user commands
- Coordinates file operations and metadata extraction
- Implements efficient tree updates for file renaming
#### Entry Point (`renamer/main.py`)
- Argument parsing for directory selection
- Application initialization and launch
#### Constants (`renamer/constants.py`)
Defines comprehensive dictionaries:
- `MEDIA_TYPES`: Supported video formats (mkv, avi, mov, mp4, etc.)
- `SOURCE_DICT`: Video source types (WEB-DL, BDRip, BluRay, etc.)
- `FRAME_CLASSES`: Resolution classifications (480p-8K)
- `MOVIE_DB_DICT`: Database identifiers (TMDB, IMDB, Trakt, TVDB)
- `SPECIAL_EDITIONS`: Edition types (Director's Cut, Extended, etc.)
### Extractor System (`renamer/extractors/`)
Modular architecture for gathering metadata from multiple sources:
#### Core Extractors
1. **MediaInfoExtractor** (`mediainfo_extractor.py`)
- Uses PyMediaInfo library
- Extracts detailed track information (video, audio, subtitle)
- Provides codec, bitrate, frame rate, resolution data
2. **FilenameExtractor** (`filename_extractor.py`)
- Parses metadata from filename patterns
- Detects year, resolution, source, codecs, edition info
- Uses regex patterns to extract structured data
3. **MetadataExtractor** (`metadata_extractor.py`)
- Reads embedded metadata using Mutagen
- Extracts tags, container format info
4. **FileInfoExtractor** (`fileinfo_extractor.py`)
- Basic file information (size, dates, permissions)
- MIME type detection via python-magic
5. **TMDBExtractor** (`tmdb_extractor.py`)
- The Movie Database API integration
- Fetches title, year, ratings, overview, genres, poster
- Supports movie and TV show data
6. **DefaultExtractor** (`default_extractor.py`)
- Fallback extractor providing minimal data
#### Extractor Coordinator (`extractor.py`)
- `MediaExtractor` class orchestrates all extractors
- Provides unified `get()` interface for data retrieval
- Caching support via decorators
### Formatter System (`renamer/formatters/`)
Transforms raw extracted data into formatted display strings:
#### Specialized Formatters
1. **MediaFormatter** (`media_formatter.py`)
- Main formatter coordinating all format operations
- Mode-aware (technical vs catalog)
- Applies color coding and styling
2. **CatalogFormatter** (`catalog_formatter.py`)
- Formats catalog mode display
- Renders TMDB data, ratings, genres, overview
- Terminal image display for posters (rich-pixels)
3. **TrackFormatter** (`track_formatter.py`)
- Video/audio/subtitle track formatting
- Color-coded track information
4. **ProposedNameFormatter** (`proposed_name_formatter.py`)
- Generates intelligent rename suggestions
- Pattern: `Title (Year) [Resolution Source Edition].ext`
- Sanitizes filenames (removes invalid characters)
5. **Utility Formatters**
- `SizeFormatter`: Human-readable file sizes
- `DateFormatter`: Timestamp formatting
- `DurationFormatter`: Duration in HH:MM:SS
- `ResolutionFormatter`: Resolution display
- `TextFormatter`: Text styling utilities
- `ExtensionFormatter`: File extension handling
- `SpecialInfoFormatter`: Edition/source formatting
- `HelperFormatter`: General formatting helpers
### Settings & Caching
#### Settings System (`renamer/settings.py`)
- JSON configuration stored in `~/.config/renamer/config.json`
- Configurable options:
- `mode`: "technical" or "catalog"
- `cache_ttl_extractors`: 21600s (6 hours)
- `cache_ttl_tmdb`: 21600s (6 hours)
- `cache_ttl_posters`: 2592000s (30 days)
- Automatic save/load with defaults
#### Cache System (`renamer/cache.py`)
- File-based cache with TTL support
- Location: `~/.cache/renamer/`
- Subdirectory organization (tmdb/, posters/, extractors/, general/)
- Supports JSON and pickle serialization
- In-memory cache for performance
- Image caching for TMDB posters
- Automatic expiration and cleanup
#### Caching Decorators (`renamer/decorators/caching.py`)
- `@cached` decorator for automatic method caching
- Integrates with Settings for TTL configuration
### UI Screens (`renamer/screens.py`)
Additional UI screens for user interaction:
1. **OpenScreen**: Directory selection dialog with validation
2. **HelpScreen**: Comprehensive help with key bindings
3. **RenameConfirmScreen**: File rename confirmation with error handling
4. **SettingsScreen**: Settings configuration interface
### Development Tools
#### Version Management (`renamer/bump.py`)
- `bump-version` command
- Auto-increments patch version in `pyproject.toml`
#### Release Automation (`renamer/release.py`)
- `release` command
- Runs: version bump → dependency sync → package build
## Key Features
### Current Features (v0.5.10)
- Recursive directory scanning for video files
- Tree view with expand/collapse navigation
- Dual-mode display (technical/catalog)
- Detailed metadata extraction from multiple sources
- Intelligent file renaming with preview
- TMDB integration with poster display
- Settings configuration UI
- Persistent caching with TTL
- Loading indicators and error handling
- Confirmation dialogs for file operations
- Color-coded information display
- Keyboard and mouse navigation
### Keyboard Commands
- `q`: Quit application
- `o`: Open directory
- `s`: Scan/rescan directory
- `f`: Refresh metadata for selected file
- `r`: Rename file with proposed name
- `p`: Toggle tree expansion
- `h`: Show help screen
- `^p`: Open command palette
- Settings menu via action bar
## Technology Stack
### Core Dependencies
- **textual** (≥6.11.0): TUI framework
- **pymediainfo** (≥6.0.0): Media track analysis
- **mutagen** (≥1.47.0): Embedded metadata
- **python-magic** (≥0.4.27): MIME detection
- **langcodes** (≥3.5.1): Language code handling
- **requests** (≥2.31.0): HTTP for TMDB API
- **rich-pixels** (≥1.0.0): Terminal image display
- **pytest** (≥7.0.0): Testing framework
### System Requirements
- Python 3.11 or higher
- UV package manager (recommended)
- MediaInfo library (system dependency for pymediainfo)
## Development Workflow
### Setup
```bash ```bash
# Install UV uv sync --extra dev # Setup
curl -LsSf https://astral.sh/uv/install.sh | sh uv run pytest # Test
uv run renamer [dir] # Run
# Clone and sync
cd /path/to/renamer
uv sync
# Run from source
uv run python renamer/main.py [directory]
``` ```
### Development Commands ## Essential Principles
```bash
uv run renamer # Run installed version
uv run pytest # Run tests
uv run bump-version # Increment version
uv run release # Build release (bump + sync + build)
uv build # Build wheel/tarball
uv tool install . # Install as global tool
```
### Debugging 1. **Read [ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md) first**
```bash 2. Read files before modifying
# Enable formatter logging 3. Test everything (`uv run pytest`)
FORMATTER_LOG=1 uv run renamer /path/to/directory 4. Follow existing patterns
# Creates formatter.log with detailed call traces 5. Keep solutions simple
```
### Testing
- Test files in `renamer/test/`
- Sample filenames in `test/filenames.txt` and `test/test_filenames.txt`
- Test cases in `test/test_cases.json`
- Run with: `uv run pytest`
## Code Style & Standards
### Python Standards
- Type hints encouraged
- PEP 8 style guidelines
- Descriptive variable/function names
- Docstrings for classes and functions
- Pathlib for file operations
- Proper exception handling
### Architecture Patterns
- Extractor pattern: Each extractor focuses on one data source
- Formatter pattern: Formatters handle display logic, extractors handle data
- Separation of concerns: Data extraction → formatting → display
- Dependency injection: Extractors and formatters are modular
- Configuration management: Settings class for all config
### Best Practices
- Avoid over-engineering (keep solutions simple)
- Only add features when explicitly requested
- Validate at system boundaries only (user input, external APIs)
- Don't add unnecessary error handling for internal code
- Trust framework guarantees
- Delete unused code completely (no backwards-compat hacks)
## File Operations
### Directory Scanning
- Recursive search for supported video formats
- File tree representation with hierarchical structure
- Efficient tree updates on file operations
### File Renaming
1. Select file in tree
2. Press `r` to initiate rename
3. Review proposed name (shows current vs proposed)
4. Confirm with `y` or cancel with `n`
5. Tree updates in-place without full reload
### Metadata Caching
- First extraction cached for 6 hours
- TMDB data cached for 6 hours
- Posters cached for 30 days
- Force refresh with `f` command
- Cache invalidated on file rename
## API Integration
### TMDB API
- API key stored in `renamer/secrets.py`
- Search endpoint for movie lookup by title/year
- Image base URL for poster downloads
- Handles rate limiting and errors gracefully
- Falls back to filename data if API unavailable
## Project Files
### Documentation
- `README.md`: User-facing documentation
- `AI_AGENT.md`: AI agent instructions (legacy)
- `DEVELOP.md`: Developer guide
- `INSTALL.md`: Installation instructions
- `ToDo.md`: Task tracking
- `CLAUDE.md`: This file (AI assistant reference)
### Configuration
- `pyproject.toml`: Project metadata, dependencies, build config
- `uv.lock`: Locked dependencies
### Build Artifacts
- `dist/`: Built wheels and tarballs
- `build/`: Build intermediates
- `renamer.egg-info/`: Package metadata
## Known Issues & Limitations
### Current Limitations
- TMDB API requires internet connection
- Poster display requires terminal with image support
- Some special characters in filenames need sanitization
- Large directories may have initial scan delay
### Future Enhancements (See ToDo.md)
- Metadata editing capabilities
- Batch rename operations
- Advanced search and filtering
- Undo/redo functionality
- Plugin system for custom extractors/formatters
- Full genre name expansion (currently shows codes)
- Improved poster quality/display optimization
## Contributing Guidelines
### Making Changes
1. Read existing code and understand architecture
2. Check `ToDo.md` for pending tasks
3. Implement features incrementally
4. Test with real media files
5. Ensure backward compatibility
6. Update documentation
7. Update tests as needed
8. Run `uv run release` before committing
### Commit Standards
- Clear, descriptive commit messages
- Focus on "why" not "what"
- One logical change per commit
- Reference related issues/tasks
### Code Review Checklist
- [ ] Follows PEP 8 style
- [ ] Type hints added where appropriate
- [ ] No unnecessary complexity
- [ ] Tests pass (`uv run pytest`)
- [ ] Documentation updated
- [ ] No security vulnerabilities (XSS, injection, etc.)
- [ ] Efficient resource usage (no memory leaks)
## Security Considerations
- Input sanitization for filenames (see `ProposedNameFormatter`)
- No shell command injection risks
- Safe file operations (pathlib, proper error handling)
- TMDB API key should not be committed (stored in `secrets.py`)
- Cache directory permissions should be user-only
## Performance Notes
- In-memory cache reduces repeated extraction overhead
- File cache persists across sessions
- Tree updates optimized for rename operations
- TMDB requests throttled to respect API limits
- Large directory scans use async/await patterns
## Special Notes for AI Assistants
### When Adding Features
1. **Always read relevant files first** - Never modify code you haven't read
2. **Check ToDo.md** - See if feature is already planned
3. **Understand existing patterns** - Follow established architecture
4. **Test with real files** - Use actual media files for testing
5. **Update documentation** - Keep docs in sync with code
### When Debugging
1. **Enable formatter logging** - Use `FORMATTER_LOG=1` for detailed traces
2. **Check cache state** - Clear cache if stale data suspected
3. **Verify file permissions** - Ensure read/write access
4. **Test with sample filenames** - Use test fixtures first
### When Refactoring
1. **Maintain backward compatibility** - Unless explicitly breaking change
2. **Update tests** - Reflect refactored code
3. **Check all formatters** - Formatting is centralized
4. **Verify extractor chain** - Ensure data flow intact
### Common Pitfalls to Avoid
- Don't create new files unless absolutely necessary (edit existing)
- Don't add features beyond what's requested
- Don't over-engineer solutions
- Don't skip testing with real files
- Don't forget to update version number for releases
- Don't commit secrets or API keys
- Don't use deprecated Textual APIs
## Project History
### Evolution
- Started as simple file renamer
- Added metadata extraction (MediaInfo, Mutagen)
- Expanded to TUI with Textual framework
- Added filename parsing intelligence
- Integrated TMDB for catalog mode
- Added settings and caching system
- Implemented poster display with rich-pixels
- Added dual-mode interface (technical/catalog)
### Version Milestones
- 0.2.x: Initial TUI with basic metadata
- 0.3.x: Enhanced extractors and formatters
- 0.4.x: Added TMDB integration
- 0.5.x: Settings, caching, catalog mode, poster display
## Resources
### External Documentation
- [Textual Documentation](https://textual.textualize.io/)
- [PyMediaInfo Documentation](https://pymediainfo.readthedocs.io/)
- [Mutagen Documentation](https://mutagen.readthedocs.io/)
- [TMDB API Documentation](https://developers.themoviedb.org/3)
- [UV Documentation](https://docs.astral.sh/uv/)
### Internal Documentation
- Main README: User guide and quick start
- DEVELOP.md: Developer setup and debugging
- INSTALL.md: Installation methods
- AI_AGENT.md: Legacy AI instructions (historical)
- ToDo.md: Current task list
--- ---
**Last Updated**: 2025-12-31 **Full Documentation**: [ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md)
**For AI Assistant**: Claude (Anthropic)
**Project Maintainer**: sha
**Repository**: `/home/sha/bin/renamer`

View File

@@ -1,220 +1,118 @@
# Developer Guide # Developer Guide
This guide contains information for developers working on the Renamer project. **Version**: 0.7.0-dev
**Last Updated**: 2026-01-01
**Current Version**: 0.5.10 > **📘 For complete development documentation, see [ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md)**
## Development Setup Quick reference for developers working on the Renamer project.
### Prerequisites
- Python 3.11+
- UV package manager
### Install UV (if not already installed)
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
```
### Development Installation
```bash
# Clone the repository
git clone <repository-url>
cd renamer
# Install in development mode with all dependencies
uv sync
# Install the package in editable mode
uv pip install -e .
```
### Running in Development
```bash
# Run directly from source
uv run python renamer/main.py
# Or run with specific directory
uv run python renamer/main.py /path/to/directory
# Or use the installed command
uv run renamer
```
## Development Commands
The project includes several development commands defined in `pyproject.toml`:
### bump-version
Increments the patch version in `pyproject.toml` (e.g., 0.2.6 → 0.2.7).
```bash
uv run bump-version
```
### release
Runs a batch process: bump version, sync dependencies, and build the package.
```bash
uv run release
```
### Other Commands
- `uv sync`: Install/update dependencies
- `uv build`: Build the package
- `uv run pytest`: Run tests
## Debugging
### Formatter Logging
Enable detailed logging for formatter operations:
```bash
FORMATTER_LOG=1 uv run renamer /path/to/directory
```
This creates `formatter.log` in the current directory with:
- Formatter call sequences and ordering
- Input/output values for each formatter
- Caller information (file and line number)
- Any errors during formatting
- Timestamp for each operation
### Cache Inspection
Cache is stored in `~/.cache/renamer/` with subdirectories:
- `extractors/`: Extractor results cache
- `tmdb/`: TMDB API response cache
- `posters/`: Downloaded poster images
- `general/`: General purpose cache
To clear cache:
```bash
rm -rf ~/.cache/renamer/
```
### Settings Location
Settings are stored in `~/.config/renamer/config.json`:
```json
{
"mode": "technical",
"cache_ttl_extractors": 21600,
"cache_ttl_tmdb": 21600,
"cache_ttl_posters": 2592000
}
```
## Architecture
The application uses a modular architecture with clear separation of concerns:
### Core Application (`renamer/`)
- **app.py**: Main RenamerApp class (Textual App), tree management, file operations
- **main.py**: Entry point with argument parsing
- **constants.py**: Comprehensive constants (media types, sources, resolutions, editions)
- **settings.py**: Settings management with JSON persistence (`~/.config/renamer/`)
- **cache.py**: File-based caching system with TTL support (`~/.cache/renamer/`)
- **secrets.py**: API keys and secrets (TMDB)
### Extractors (`renamer/extractors/`)
Data extraction from multiple sources:
- **extractor.py**: MediaExtractor coordinator class
- **mediainfo_extractor.py**: PyMediaInfo for detailed track information
- **filename_extractor.py**: Regex-based filename parsing
- **metadata_extractor.py**: Mutagen for embedded metadata
- **fileinfo_extractor.py**: Basic file information (size, dates, MIME)
- **tmdb_extractor.py**: The Movie Database API integration
- **default_extractor.py**: Fallback extractor
### Formatters (`renamer/formatters/`)
Display formatting and rendering:
- **formatter.py**: Base formatter interface
- **media_formatter.py**: Main formatter coordinating all format operations
- **catalog_formatter.py**: Catalog mode display (TMDB data, posters)
- **proposed_name_formatter.py**: Intelligent rename suggestions
- **track_formatter.py**: Video/audio/subtitle track formatting
- **size_formatter.py**: Human-readable file sizes
- **date_formatter.py**: Timestamp formatting
- **duration_formatter.py**: Duration in HH:MM:SS format
- **resolution_formatter.py**: Resolution display
- **extension_formatter.py**: File extension handling
- **special_info_formatter.py**: Edition/source formatting
- **text_formatter.py**: Text styling utilities
- **helper_formatter.py**: General formatting helpers
### Screens (`renamer/screens.py`)
UI screens for user interaction:
- **OpenScreen**: Directory selection with validation
- **HelpScreen**: Comprehensive help with key bindings
- **RenameConfirmScreen**: File rename confirmation with preview
- **SettingsScreen**: Settings configuration UI
### Utilities
- **decorators/caching.py**: Caching decorator for automatic method caching
- **bump.py**: Version bump utility script
- **release.py**: Release automation (bump + sync + build)
## Testing
Run tests with:
```bash
uv run pytest
```
Test files are located in `renamer/test/` with sample filenames in `filenames.txt`.
## Building and Distribution
### Build the Package
```bash
uv build
```
### Install as Tool
```bash
uv tool install .
```
### Uninstall
```bash
uv tool uninstall renamer
```
## Code Style
The project follows Python best practices:
- **PEP 8**: Standard Python style guide
- **Type Hints**: Encouraged where appropriate
- **Docstrings**: For all classes and public methods
- **Descriptive Naming**: Clear variable and function names
- **Pathlib**: For all file operations
- **Error Handling**: Appropriate exception handling at boundaries
Consider using tools like:
- `ruff` for linting and formatting
- `mypy` for type checking
- `black` for consistent formatting
## Contributing
1. Fork the repository
2. Create a feature branch
3. Make your changes
4. Run tests: `uv run pytest`
5. Run the release process: `uv run release`
6. Submit a pull request
## Additional Documentation
For comprehensive project information:
- **[README.md](README.md)**: User guide and features
- **[CLAUDE.md](CLAUDE.md)**: Complete AI assistant reference
- **[AI_AGENT.md](AI_AGENT.md)**: AI agent instructions
- **[INSTALL.md](INSTALL.md)**: Installation methods
- **[ToDo.md](ToDo.md)**: Task list and priorities
## Project Resources
- **Cache Directory**: `~/.cache/renamer/`
- **Config Directory**: `~/.config/renamer/`
- **Test Files**: `renamer/test/`
- **Build Output**: `dist/` and `build/`
--- ---
**Last Updated**: 2025-12-31 ## Quick Setup
```bash
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
# Clone and setup
cd /home/sha/bin/renamer
uv sync --extra dev
```
---
## Essential Commands
```bash
# Run from source
uv run renamer [directory]
# Run tests
uv run pytest
# Run with coverage
uv run pytest --cov=renamer
# Type check
uv run mypy renamer/
# Version bump
uv run bump-version
# Full release
uv run release
# Build distribution
uv build
```
---
## Debugging
```bash
# Enable detailed logging
FORMATTER_LOG=1 uv run renamer /path/to/directory
# Check logs
cat formatter.log
# Clear cache
rm -rf ~/.cache/renamer/
```
---
## Testing
```bash
# All tests
uv run pytest
# Specific file
uv run pytest renamer/test/test_services.py
# Verbose
uv run pytest -xvs
# Generate sample files
uv run python renamer/test/fill_sample_mediafiles.py
```
See [ENGINEERING_GUIDE.md - Testing Strategy](ENGINEERING_GUIDE.md#testing-strategy)
---
## Release Process
```bash
# 1. Bump version
uv run bump-version
# 2. Run full release
uv run release
# 3. Test installation
uv tool install .
# 4. Manual testing
uv run renamer /path/to/test/media
```
See [ENGINEERING_GUIDE.md - Release Process](ENGINEERING_GUIDE.md#release-process)
---
## Documentation
- **[ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md)** - Complete technical reference
- **[README.md](README.md)** - User guide
- **[INSTALL.md](INSTALL.md)** - Installation instructions
- **[CHANGELOG.md](CHANGELOG.md)** - Version history
- **[REFACTORING_PROGRESS.md](REFACTORING_PROGRESS.md)** - Future plans
- **[ToDo.md](ToDo.md)** - Current tasks
---
**For complete documentation, see [ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md)**

944
ENGINEERING_GUIDE.md Normal file
View File

@@ -0,0 +1,944 @@
# Renamer Engineering Guide
**Version**: 0.7.0-dev
**Last Updated**: 2026-01-01
**Python**: 3.11+
**Status**: Active Development
This is the comprehensive technical reference for the Renamer project. It contains all architectural information, implementation details, development workflows, and AI assistant instructions.
---
## Table of Contents
1. [Project Overview](#project-overview)
2. [Architecture](#architecture)
3. [Core Components](#core-components)
4. [Development Setup](#development-setup)
5. [Testing Strategy](#testing-strategy)
6. [Code Standards](#code-standards)
7. [AI Assistant Instructions](#ai-assistant-instructions)
8. [Release Process](#release-process)
---
## Project Overview
### Purpose
Renamer is a sophisticated Terminal User Interface (TUI) application for managing, viewing metadata, and renaming media files. Built with Python and the Textual framework.
**Dual-Mode Operation**:
- **Technical Mode**: Detailed technical metadata (video tracks, audio streams, codecs, bitrates)
- **Catalog Mode**: Media library catalog view with TMDB integration (posters, ratings, descriptions)
### Current Version
- **Version**: 0.7.0-dev (in development)
- **Python**: 3.11+
- **License**: Not specified
- **Repository**: `/home/sha/bin/renamer`
### Technology Stack
#### Core Dependencies
- **textual** (≥6.11.0): TUI framework
- **pymediainfo** (≥6.0.0): Media track analysis
- **mutagen** (≥1.47.0): Embedded metadata
- **python-magic** (≥0.4.27): MIME detection
- **langcodes** (≥3.5.1): Language code handling
- **requests** (≥2.31.0): HTTP for TMDB API
- **rich-pixels** (≥1.0.0): Terminal image display
- **pytest** (≥7.0.0): Testing framework
#### Dev Dependencies
- **mypy** (≥1.0.0): Type checking
#### System Requirements
- Python 3.11 or higher
- UV package manager (recommended)
- MediaInfo library (system dependency)
---
## Architecture
### Architectural Layers
```
┌─────────────────────────────────────────┐
│ TUI Layer (Textual) │
│ app.py, screens.py │
└─────────────────────────────────────────┘
┌─────────────────────────────────────────┐
│ Service Layer │
│ FileTreeService, MetadataService, │
│ RenameService │
└─────────────────────────────────────────┘
┌─────────────────────────────────────────┐
│ Extractor Layer │
│ MediaExtractor coordinates: │
│ - FilenameExtractor │
│ - MediaInfoExtractor │
│ - MetadataExtractor │
│ - FileInfoExtractor │
│ - TMDBExtractor │
│ - DefaultExtractor │
└─────────────────────────────────────────┘
┌─────────────────────────────────────────┐
│ Formatter Layer │
│ FormatterApplier coordinates: │
│ - DataFormatters (size, duration) │
│ - TextFormatters (case, style) │
│ - MarkupFormatters (colors, bold) │
└─────────────────────────────────────────┘
┌─────────────────────────────────────────┐
│ Utility & Cache Layer │
│ - PatternExtractor │
│ - LanguageCodeExtractor │
│ - FrameClassMatcher │
│ - Unified Cache Subsystem │
└─────────────────────────────────────────┘
```
### Design Patterns
1. **Protocol-Based Architecture**: `DataExtractor` Protocol defines extractor interface
2. **Coordinator Pattern**: `MediaExtractor` coordinates multiple extractors with priority system
3. **Strategy Pattern**: Cache key strategies for different data types
4. **Decorator Pattern**: `@cached_method()` for method-level caching
5. **Service Layer**: Business logic separated from UI
6. **Dependency Injection**: Services receive extractors/formatters as dependencies
---
## Core Components
### 1. Main Application (`renamer/app.py`)
**Class**: `RenamerApp(App)`
**Responsibilities**:
- TUI layout management (split view: file tree + details panel)
- Keyboard/mouse navigation
- Command palette integration (Ctrl+P)
- File operation coordination
- Efficient tree updates
**Key Features**:
- Two command providers: `AppCommandProvider`, `CacheCommandProvider`
- Dual-mode support (technical/catalog)
- Real-time metadata display
### 2. Service Layer (`renamer/services/`)
#### FileTreeService (`file_tree_service.py`)
- Directory scanning and validation
- Recursive tree building with filtering
- Media file detection (based on `MEDIA_TYPES`)
- Permission error handling
- Tree node searching by path
- Directory statistics
#### MetadataService (`metadata_service.py`)
- **Thread pool management** (ThreadPoolExecutor, configurable workers)
- **Thread-safe operations** with Lock
- Concurrent metadata extraction
- **Active extraction tracking** and cancellation
- Cache integration via decorators
- Synchronous and asynchronous modes
- Formatter coordination
- Error handling with callbacks
- Context manager support
#### RenameService (`rename_service.py`)
- Proposed name generation from metadata
- Filename validation and sanitization
- Invalid character removal (cross-platform)
- Reserved name checking (Windows compatibility)
- File conflict detection
- Atomic rename operations
- Dry-run mode
- Callback-based rename with success/error handlers
- Markup tag stripping
### 3. Extractor System (`renamer/extractors/`)
#### Base Protocol (`base.py`)
```python
class DataExtractor(Protocol):
"""Defines standard interface for all extractors"""
def extract_title(self) -> Optional[str]: ...
def extract_year(self) -> Optional[str]: ...
# ... 21 methods total
```
#### MediaExtractor (`extractor.py`)
**Coordinator class** managing priority-based extraction:
**Priority Order Examples**:
- Title: TMDB → Metadata → Filename → Default
- Year: Filename → Default
- Technical info: MediaInfo → Default
- File info: FileInfo → Default
**Usage**:
```python
extractor = MediaExtractor(Path("movie.mkv"))
title = extractor.get("title") # Tries sources in priority order
year = extractor.get("year", source="Filename") # Force specific source
```
#### Specialized Extractors
1. **FilenameExtractor** (`filename_extractor.py`)
- Parses metadata from filename patterns
- Detects year, resolution, source, codecs, edition
- Uses regex patterns and utility classes
- Handles Cyrillic normalization
- Extracts language codes with counts (e.g., "2xUKR_ENG")
2. **MediaInfoExtractor** (`mediainfo_extractor.py`)
- Uses PyMediaInfo library
- Extracts detailed track information
- Provides codec, bitrate, frame rate, resolution
- Frame class matching with tolerances
3. **MetadataExtractor** (`metadata_extractor.py`)
- Uses Mutagen library for embedded tags
- Extracts title, artist, duration
- Falls back to MIME type detection
- Handles multiple container formats
4. **FileInfoExtractor** (`fileinfo_extractor.py`)
- Basic file system information
- Size, modification time, paths
- Extension extraction
- Fast, no external dependencies
5. **TMDBExtractor** (`tmdb_extractor.py`)
- The Movie Database API integration
- Fetches title, year, ratings, overview, genres
- Downloads and caches posters
- Supports movies and TV shows
- Rate limiting and error handling
6. **DefaultExtractor** (`default_extractor.py`)
- Fallback extractor providing default values
- Returns None or empty collections
- Safe final fallback in extractor chain
### 4. Formatter System (`renamer/formatters/`)
#### Base Classes (`base.py`)
- `Formatter`: Base ABC with abstract `format()` method
- `DataFormatter`: For data transformations (sizes, durations, dates)
- `TextFormatter`: For text transformations (case changes)
- `MarkupFormatter`: For visual styling (colors, bold, links)
- `CompositeFormatter`: For chaining multiple formatters
#### FormatterApplier (`formatter.py`)
**Coordinator** ensuring correct formatter order:
**Order**: Data → Text → Markup
**Global Ordering**:
1. Data formatters (size, duration, date, track info)
2. Text formatters (uppercase, lowercase, camelcase)
3. Markup formatters (bold, colors, dim, underline)
**Usage**:
```python
formatters = [SizeFormatter.format_size, TextFormatter.bold]
result = FormatterApplier.apply_formatters(1024, formatters)
# Result: bold("1.00 KB")
```
#### Specialized Formatters
- **MediaFormatter**: Main coordinator, mode-aware (technical/catalog)
- **CatalogFormatter**: TMDB data, ratings, genres, poster display
- **TrackFormatter**: Video/audio/subtitle track formatting with colors
- **ProposedNameFormatter**: Intelligent rename suggestions
- **SizeFormatter**: Human-readable file sizes
- **DurationFormatter**: Duration in HH:MM:SS
- **DateFormatter**: Timestamp formatting
- **ResolutionFormatter**: Resolution display
- **ExtensionFormatter**: File extension handling
- **SpecialInfoFormatter**: Edition/source formatting
- **TextFormatter**: Text styling utilities
### 5. Utility Modules (`renamer/utils/`)
#### PatternExtractor (`pattern_utils.py`)
**Centralized regex pattern matching**:
- Movie database ID extraction (TMDB, IMDB, Trakt, TVDB)
- Year extraction and validation
- Quality indicator detection
- Source indicator detection
- Bracketed content manipulation
- Position finding for year/quality/source
**Example**:
```python
extractor = PatternExtractor()
db_info = extractor.extract_movie_db_ids("[tmdbid-12345]")
# Returns: {'type': 'tmdb', 'id': '12345'}
```
#### LanguageCodeExtractor (`language_utils.py`)
**Language code processing**:
- Extract from brackets: `[UKR_ENG]``['ukr', 'eng']`
- Extract standalone codes from filename
- Handle count patterns: `[2xUKR_ENG]`
- Convert to ISO 639-3 codes
- Skip quality indicators and file extensions
- Format as language counts: `"2ukr,eng"`
**Example**:
```python
extractor = LanguageCodeExtractor()
langs = extractor.extract_from_brackets("[2xUKR_ENG]")
# Returns: ['ukr', 'ukr', 'eng']
```
#### FrameClassMatcher (`frame_utils.py`)
**Resolution/frame class matching**:
- Multi-step matching algorithm
- Height and width tolerance
- Aspect ratio calculation
- Scan type detection (progressive/interlaced)
- Standard resolution checking
- Nominal height/typical widths lookup
**Matching Strategy**:
1. Exact height + width match
2. Height match with aspect ratio validation
3. Closest height match
4. Non-standard quality indicator detection
### 6. Constants (`renamer/constants/`)
**Modular organization** (8 files):
1. **media_constants.py**: `MEDIA_TYPES` - Supported video formats
2. **source_constants.py**: `SOURCE_DICT` - Video source types
3. **frame_constants.py**: `FRAME_CLASSES`, `NON_STANDARD_QUALITY_INDICATORS`
4. **moviedb_constants.py**: `MOVIE_DB_DICT` - Database identifiers
5. **edition_constants.py**: `SPECIAL_EDITIONS` - Edition types
6. **lang_constants.py**: `SKIP_WORDS` - Words to skip in language detection
7. **year_constants.py**: `is_valid_year()`, dynamic year validation
8. **cyrillic_constants.py**: `CYRILLIC_TO_ENGLISH` - Character mappings
**Backward Compatibility**: All constants exported via `__init__.py`
### 7. Cache Subsystem (`renamer/cache/`)
**Unified, modular architecture**:
```
renamer/cache/
├── __init__.py # Exports and convenience functions
├── core.py # Core Cache class (thread-safe with RLock)
├── types.py # CacheEntry, CacheStats TypedDicts
├── strategies.py # Cache key generation strategies
├── managers.py # CacheManager for operations
└── decorators.py # Enhanced cache decorators
```
#### Cache Key Strategies
- `FilepathMethodStrategy`: For extractor methods
- `APIRequestStrategy`: For API responses
- `SimpleKeyStrategy`: For simple prefix+id patterns
- `CustomStrategy`: User-defined key generation
#### Cache Decorators
```python
@cached_method(ttl=3600) # Method caching
def extract_title(self):
...
@cached_api(service="tmdb", ttl=21600) # API caching
def fetch_movie_data(self, movie_id):
...
```
#### Cache Manager Operations
- `clear_all()`: Remove all cache entries
- `clear_by_prefix(prefix)`: Clear specific cache type
- `clear_expired()`: Remove expired entries
- `get_stats()`: Comprehensive statistics
- `clear_file_cache(file_path)`: Clear cache for specific file
- `compact_cache()`: Remove empty directories
#### Command Palette Integration
Access via Ctrl+P:
- Cache: View Statistics
- Cache: Clear All
- Cache: Clear Extractors / TMDB / Posters
- Cache: Clear Expired / Compact
#### Thread Safety
- All operations protected by `threading.RLock`
- Safe for concurrent extractor access
- Memory cache synchronized with file cache
### 8. UI Screens (`renamer/screens.py`)
1. **OpenScreen**: Directory selection dialog with validation
2. **HelpScreen**: Comprehensive help with key bindings
3. **RenameConfirmScreen**: File rename confirmation with error handling
4. **SettingsScreen**: Settings configuration interface
### 9. Settings System (`renamer/settings.py`)
**Configuration**: `~/.config/renamer/config.json`
**Options**:
```json
{
"mode": "technical", // or "catalog"
"cache_ttl_extractors": 21600, // 6 hours
"cache_ttl_tmdb": 21600, // 6 hours
"cache_ttl_posters": 2592000 // 30 days
}
```
Automatic save/load with defaults.
---
## Development Setup
### Installation
```bash
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
# Clone and sync
cd /home/sha/bin/renamer
uv sync
# Install dev dependencies
uv sync --extra dev
# Run from source
uv run python renamer/main.py [directory]
```
### Development Commands
```bash
# Run installed version
uv run renamer [directory]
# Run tests
uv run pytest
# Run tests with coverage
uv run pytest --cov=renamer
# Type checking
uv run mypy renamer/extractors/default_extractor.py
# Version management
uv run bump-version # Increment patch version
uv run release # Bump + sync + build
# Build distribution
uv build # Create wheel and tarball
# Install as global tool
uv tool install .
```
### Debugging
```bash
# Enable formatter logging
FORMATTER_LOG=1 uv run renamer /path/to/directory
# Creates formatter.log with detailed call traces
```
---
## Testing Strategy
### Test Organization
```
renamer/test/
├── datasets/ # Test data
│ ├── filenames/
│ │ ├── filename_patterns.json # 46 test cases
│ │ └── sample_files/ # Legacy reference
│ ├── mediainfo/
│ │ └── frame_class_tests.json # 25 test cases
│ └── sample_mediafiles/ # Generated (in .gitignore)
├── conftest.py # Fixtures and dataset loaders
├── test_cache_subsystem.py # 18 cache tests
├── test_services.py # 30+ service tests
├── test_utils.py # 70+ utility tests
├── test_formatters.py # 40+ formatter tests
├── test_filename_detection.py # Comprehensive filename parsing
├── test_filename_extractor.py # 368 extractor tests
├── test_mediainfo_*.py # MediaInfo tests
├── test_fileinfo_extractor.py # File info tests
└── test_metadata_extractor.py # Metadata tests
```
### Test Statistics
- **Total Tests**: 560 (1 skipped)
- **Service Layer**: 30+ tests
- **Utilities**: 70+ tests
- **Formatters**: 40+ tests
- **Extractors**: 400+ tests
- **Cache**: 18 tests
### Sample File Generation
```bash
# Generate 46 test files from filename_patterns.json
uv run python renamer/test/fill_sample_mediafiles.py
```
### Test Fixtures
```python
# Load test datasets
patterns = load_filename_patterns()
frame_tests = load_frame_class_tests()
dataset = load_dataset("custom_name")
file_path = get_test_file_path("movie.mkv")
```
### Running Tests
```bash
# All tests
uv run pytest
# Specific test file
uv run pytest renamer/test/test_services.py
# With verbose output
uv run pytest -xvs
# With coverage
uv run pytest --cov=renamer --cov-report=html
```
---
## Code Standards
### Python Standards
- **Version**: Python 3.11+
- **Style**: PEP 8 guidelines
- **Type Hints**: Encouraged for all public APIs
- **Docstrings**: Google-style format
- **Pathlib**: For all file operations
- **Exception Handling**: Specific exceptions (no bare `except:`)
### Docstring Format
```python
def example_function(param1: int, param2: str) -> bool:
"""Brief description of function.
Longer description if needed, explaining behavior,
edge cases, or important details.
Args:
param1: Description of param1
param2: Description of param2
Returns:
Description of return value
Raises:
ValueError: When param1 is negative
Example:
>>> example_function(5, "test")
True
"""
pass
```
### Type Hints
```python
from typing import Optional
# Function type hints
def extract_title(self) -> Optional[str]:
...
# Union types (Python 3.10+)
def extract_movie_db(self) -> list[str] | None:
...
# Generic types
def extract_tracks(self) -> list[dict]:
...
```
### Logging Strategy
**Levels**:
- **Debug**: Language code conversions, metadata reads, MIME detection
- **Warning**: Network failures, API errors, MediaInfo parse failures
- **Error**: Formatter application failures
**Usage**:
```python
import logging
logger = logging.getLogger(__name__)
logger.debug(f"Converted {lang_code} to {iso3_code}")
logger.warning(f"TMDB API request failed: {e}")
logger.error(f"Error applying {formatter.__name__}: {e}")
```
### Error Handling
**Guidelines**:
- Catch specific exceptions: `(LookupError, ValueError, AttributeError)`
- Log all caught exceptions with context
- Network errors: `(requests.RequestException, ValueError)`
- Always close file handles (use context managers)
**Example**:
```python
try:
lang_obj = langcodes.Language.get(lang_code.lower())
return lang_obj.to_alpha3()
except (LookupError, ValueError, AttributeError) as e:
logger.debug(f"Invalid language code '{lang_code}': {e}")
return None
```
### Architecture Patterns
1. **Extractor Pattern**: Each extractor focuses on one data source
2. **Formatter Pattern**: Formatters handle display logic, extractors handle data
3. **Separation of Concerns**: Data extraction → formatting → display
4. **Dependency Injection**: Extractors and formatters are modular
5. **Configuration Management**: Settings class for all config
### Best Practices
- **Simplicity**: Avoid over-engineering, keep solutions simple
- **Minimal Changes**: Only modify what's explicitly requested
- **Validation**: Only at system boundaries (user input, external APIs)
- **Trust Internal Code**: Don't add unnecessary error handling
- **Delete Unused Code**: No backwards-compatibility hacks
- **No Premature Abstraction**: Three similar lines > premature abstraction
---
## AI Assistant Instructions
### Core Principles
1. **Read Before Modify**: Always read files before suggesting modifications
2. **Follow Existing Patterns**: Understand established architecture before changes
3. **Test Everything**: Run `uv run pytest` after all changes
4. **Simplicity First**: Avoid over-engineering solutions
5. **Document Changes**: Update relevant documentation
### When Adding Features
1. Read existing code and understand architecture
2. Check `REFACTORING_PROGRESS.md` for pending tasks
3. Implement features incrementally
4. Test with real media files
5. Ensure backward compatibility
6. Update documentation
7. Update tests as needed
8. Run `uv run release` before committing
### When Debugging
1. Enable formatter logging: `FORMATTER_LOG=1`
2. Check cache state (clear if stale data suspected)
3. Verify file permissions
4. Test with sample filenames first
5. Check logs in `formatter.log`
### When Refactoring
1. Maintain backward compatibility unless explicitly breaking
2. Update tests to reflect refactored code
3. Check all formatters (formatting is centralized)
4. Verify extractor chain (ensure data flow intact)
5. Run full test suite
### Common Pitfalls to Avoid
- ❌ Don't create new files unless absolutely necessary
- ❌ Don't add features beyond what's requested
- ❌ Don't skip testing with real files
- ❌ Don't forget to update version number for releases
- ❌ Don't commit secrets or API keys
- ❌ Don't use deprecated Textual APIs
- ❌ Don't use bare `except:` clauses
- ❌ Don't use command-line tools when specialized tools exist
### Tool Usage
- **Read files**: Use `Read` tool, not `cat`
- **Edit files**: Use `Edit` tool, not `sed`
- **Write files**: Use `Write` tool, not `echo >>`
- **Search files**: Use `Glob` tool, not `find`
- **Search content**: Use `Grep` tool, not `grep`
- **Run commands**: Use `Bash` tool for terminal operations only
### Git Workflow
**Commit Standards**:
- Clear, descriptive messages
- Focus on "why" not "what"
- One logical change per commit
**Commit Message Format**:
```
type: Brief description (imperative mood)
Longer explanation if needed.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
```
**Safety Protocol**:
- ❌ NEVER update git config
- ❌ NEVER run destructive commands without explicit request
- ❌ NEVER skip hooks (--no-verify, --no-gpg-sign)
- ❌ NEVER force push to main/master
- ❌ Avoid `git commit --amend` unless conditions met
### Creating Pull Requests
1. Run `git status`, `git diff`, `git log` to understand changes
2. Analyze ALL commits that will be included
3. Draft comprehensive PR summary
4. Create PR using:
```bash
gh pr create --title "Title" --body "$(cat <<'EOF'
## Summary
- Bullet points of changes
## Test plan
- Testing checklist
🤖 Generated with [Claude Code](https://claude.com/claude-code)
EOF
)"
```
---
## Release Process
### Version Management
**Version Scheme**: SemVer (MAJOR.MINOR.PATCH)
**Commands**:
```bash
# Bump patch version (0.6.0 → 0.6.1)
uv run bump-version
# Full release process
uv run release # Bump + sync + build
```
### Release Checklist
- [ ] All tests passing: `uv run pytest`
- [ ] Type checking passes: `uv run mypy renamer/`
- [ ] Documentation updated (CHANGELOG.md, README.md)
- [ ] Version bumped in `pyproject.toml`
- [ ] Dependencies synced: `uv sync`
- [ ] Build successful: `uv build`
- [ ] Install test: `uv tool install .`
- [ ] Manual testing with real media files
### Build Artifacts
```
dist/
├── renamer-0.7.0-py3-none-any.whl # Wheel distribution
└── renamer-0.7.0.tar.gz # Source distribution
```
---
## API Integration
### TMDB API
**Configuration**:
- API key stored in `renamer/secrets.py`
- Base URL: `https://api.themoviedb.org/3/`
- Image base URL for poster downloads
**Endpoints Used**:
- Search: `/search/movie`
- Movie details: `/movie/{id}`
**Rate Limiting**: Handled gracefully with error fallback
**Caching**:
- API responses cached for 6 hours
- Posters cached for 30 days
- Cache location: `~/.cache/renamer/tmdb/`, `~/.cache/renamer/posters/`
---
## File Operations
### Directory Scanning
- Recursive search for supported video formats
- File tree representation with hierarchical structure
- Efficient tree updates on file operations
- Permission error handling
### File Renaming
**Process**:
1. Select file in tree
2. Press `r` to initiate rename
3. Review proposed name (current vs proposed)
4. Confirm with `y` or cancel with `n`
5. Tree updates in-place without full reload
**Proposed Name Format**:
```
Title (Year) [Resolution Source Edition].ext
```
**Sanitization**:
- Invalid characters removed (cross-platform)
- Reserved names checked (Windows compatibility)
- Markup tags stripped
- Length validation
### Metadata Caching
- First extraction cached for 6 hours
- TMDB data cached for 6 hours
- Posters cached for 30 days
- Force refresh with `f` command
- Cache invalidated on file rename
---
## Keyboard Commands
| Key | Action |
|-----|--------|
| `q` | Quit application |
| `o` | Open directory |
| `s` | Scan/rescan directory |
| `f` | Refresh metadata for selected file |
| `r` | Rename file with proposed name |
| `p` | Toggle tree expansion |
| `m` | Toggle mode (technical/catalog) |
| `h` | Show help screen |
| `Ctrl+S` | Open settings |
| `Ctrl+P` | Open command palette |
---
## Known Issues & Limitations
### Current Limitations
- TMDB API requires internet connection
- Poster display requires terminal with image support
- Some special characters in filenames need sanitization
- Large directories may have initial scan delay
### Performance Notes
- In-memory cache reduces repeated extraction overhead
- File cache persists across sessions
- Tree updates optimized for rename operations
- TMDB requests throttled to respect API limits
- Large directory scans use async/await patterns
---
## Security Considerations
- Input sanitization for filenames (see `ProposedNameFormatter`)
- No shell command injection risks
- Safe file operations (pathlib, proper error handling)
- TMDB API key should not be committed (stored in `secrets.py`)
- Cache directory permissions should be user-only
---
## Project History
### Evolution
- Started as simple file renamer
- Added metadata extraction (MediaInfo, Mutagen)
- Expanded to TUI with Textual framework
- Added filename parsing intelligence
- Integrated TMDB for catalog mode
- Added settings and caching system
- Implemented poster display with rich-pixels
- Added dual-mode interface (technical/catalog)
- Phase 1-3 refactoring (2025-12-31 to 2026-01-01)
### Version Milestones
- **0.2.x**: Initial TUI with basic metadata
- **0.3.x**: Enhanced extractors and formatters
- **0.4.x**: Added TMDB integration
- **0.5.x**: Settings, caching, catalog mode, poster display
- **0.6.0**: Cache subsystem, service layer, protocols
- **0.7.0-dev**: Complete refactoring (in progress)
---
## Resources
### External Documentation
- [Textual Documentation](https://textual.textualize.io/)
- [PyMediaInfo Documentation](https://pymediainfo.readthedocs.io/)
- [Mutagen Documentation](https://mutagen.readthedocs.io/)
- [TMDB API Documentation](https://developers.themoviedb.org/3)
- [UV Documentation](https://docs.astral.sh/uv/)
- [Python Type Hints](https://docs.python.org/3/library/typing.html)
- [Mypy Documentation](https://mypy.readthedocs.io/)
### Internal Documentation
- **README.md**: User guide and quick start
- **INSTALL.md**: Installation methods
- **DEVELOP.md**: Developer setup and debugging
- **CHANGELOG.md**: Version history and changes
- **REFACTORING_PROGRESS.md**: Future refactoring plans
- **ToDo.md**: Current task list
---
**Last Updated**: 2026-01-01
**Maintainer**: sha
**For**: AI Assistants and Developers
**Repository**: `/home/sha/bin/renamer`

232
README.md
View File

@@ -1,118 +1,182 @@
# Renamer - Media File Renamer and Metadata Viewer # Renamer - Media File Renamer and Metadata Viewer
A powerful terminal-based (TUI) application for managing media collections. Scan directories, view detailed metadata, browse TMDB catalog information with posters, and intelligently rename files. Built with Python and Textual. **Version**: 0.7.0-dev
**Version**: 0.5.10 A powerful Terminal User Interface (TUI) for managing media collections. View detailed metadata, browse TMDB catalog with posters, and intelligently rename files.
> **📘 For complete documentation, see [ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md)**
---
## Features ## Features
### Core Capabilities - **Dual Display Modes**: Technical (codecs/tracks) or Catalog (TMDB with posters)
- **Dual Display Modes**: Switch between Technical (codec/track details) and Catalog (TMDB metadata with posters) - **Multi-Source Metadata**: MediaInfo, filename parsing, embedded tags, TMDB API
- **Recursive Directory Scanning**: Finds all video files in nested directories - **Intelligent Renaming**: Standardized names from metadata
- **Tree View Navigation**: Keyboard and mouse support with expand/collapse - **Advanced Caching**: 6h extractors, 6h TMDB, 30d posters
- **Multi-Source Metadata**: Combines MediaInfo, filename parsing, embedded tags, and TMDB API - **Terminal Posters**: View movie posters in your terminal
- **Intelligent Renaming**: Proposes standardized names based on extracted metadata - **Tree View Navigation**: Keyboard and mouse support
- **Persistent Settings**: Configurable mode and cache TTLs saved to `~/.config/renamer/`
- **Advanced Caching**: File-based cache with TTL (6h extractors, 6h TMDB, 30d posters)
- **Terminal Poster Display**: View movie posters in your terminal using rich-pixels
- **Color-Coded Display**: Visual highlighting for different data types
- **Confirmation Dialogs**: Safe file operations with preview and confirmation
- **Extensible Architecture**: Modular extractor and formatter system for easy extension
## Installation ---
### Prerequisites ## Quick Start
- Python 3.11+
- UV package manager ### Installation
### Install UV (if not already installed)
```bash ```bash
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
```
### Install the Application # Install Renamer
```bash
# Clone or download the project
cd /path/to/renamer cd /path/to/renamer
# Install dependencies and build
uv sync uv sync
# Install as a global tool
uv tool install . uv tool install .
``` ```
## Usage See [INSTALL.md](INSTALL.md) for detailed installation instructions.
### Usage
### Running the App
```bash ```bash
# Scan current directory # Scan current directory
renamer renamer
# Scan specific directory # Scan specific directory
renamer /path/to/media/directory renamer /path/to/media
``` ```
### Keyboard Commands ---
- **q**: Quit the application
- **o**: Open directory selection dialog
- **s**: Scan/rescan current directory
- **f**: Force refresh metadata for selected file (bypass cache)
- **r**: Rename selected file with proposed name
- **p**: Toggle tree expansion (expand/collapse all)
- **h**: Show help screen
- **^p**: Open command palette (settings, mode toggle)
- **Settings**: Access via action bar (top-right corner)
### Navigation ## Keyboard Commands
- Use arrow keys to navigate the file tree
- Right arrow: Expand directory
- Left arrow: Collapse directory or go to parent
- Mouse clicks supported
- Select a video file to view its details in the right panel
### File Renaming | Key | Action |
1. Select a media file in the tree |-----|--------|
2. Press **r** to initiate rename | `q` | Quit |
3. Review the proposed new name in the confirmation dialog | `o` | Open directory |
4. Press **y** to confirm or **n** to cancel | `s` | Scan/rescan |
5. The file will be renamed and the tree updated automatically (cache invalidated) | `f` | Refresh metadata |
| `r` | Rename file |
| `m` | Toggle mode (technical/catalog) |
| `p` | Toggle tree expansion |
| `h` | Show help |
| `Ctrl+S` | Settings |
| `Ctrl+P` | Command palette |
### Display Modes ---
- **Technical Mode**: Shows codec details, bitrates, track information, resolutions
- **Catalog Mode**: Shows TMDB data including title, year, rating, overview, genres, and poster ## Display Modes
- Toggle between modes via Settings menu or command palette (^p)
### Technical Mode
- Video tracks (codec, bitrate, resolution, frame rate)
- Audio tracks (codec, channels, sample rate, language)
- Subtitle tracks (format, language)
- File information (size, modification time, path)
### Catalog Mode
- TMDB title, year, rating
- Overview/description
- Genres
- Poster image (if terminal supports)
- Technical metadata
Toggle with `m` key.
---
## File Renaming
**Proposed Format**: `Title (Year) [Resolution Source Edition].ext`
**Example**: `The Matrix (1999) [1080p BluRay].mkv`
1. Press `r` on selected file
2. Review proposed name
3. Confirm with `y` or cancel with `n`
---
## Configuration
**Location**: `~/.config/renamer/config.json`
```json
{
"mode": "technical",
"cache_ttl_extractors": 21600,
"cache_ttl_tmdb": 21600,
"cache_ttl_posters": 2592000
}
```
Access via `Ctrl+S` or edit file directly.
---
## Requirements
- **Python**: 3.11+
- **UV**: Package manager
- **MediaInfo**: System library (for technical metadata)
- **Internet**: For TMDB catalog mode
---
## Project Structure
```
renamer/
├── app.py # Main TUI application
├── services/ # Business logic
├── extractors/ # Metadata extraction
├── formatters/ # Display formatting
├── utils/ # Shared utilities
├── cache/ # Caching subsystem
└── constants/ # Configuration constants
```
See [ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md) for complete architecture documentation.
---
## Development ## Development
For development setup, architecture details, debugging information, and contribution guidelines, see [DEVELOP.md](DEVELOP.md). ```bash
# Setup
uv sync --extra dev
## Supported Video Formats # Run tests
- .mkv uv run pytest
- .avi
- .mov
- .mp4
- .wmv
- .flv
- .webm
- .m4v
- .3gp
- .ogv
## Dependencies # Run from source
- **textual** ≥6.11.0: TUI framework uv run renamer [directory]
- **pymediainfo** ≥6.0.0: Detailed media track information ```
- **mutagen** ≥1.47.0: Embedded metadata extraction
- **python-magic** ≥0.4.27: MIME type detection
- **langcodes** ≥3.5.1: Language code handling
- **requests** ≥2.31.0: HTTP client for TMDB API
- **rich-pixels** ≥1.0.0: Terminal image display
- **pytest** ≥7.0.0: Testing framework
### System Requirements See [DEVELOP.md](DEVELOP.md) for development documentation.
- **Python**: 3.11 or higher
- **MediaInfo Library**: System dependency for pymediainfo ---
- Ubuntu/Debian: `sudo apt install libmediainfo-dev`
- Fedora/CentOS: `sudo dnf install libmediainfo-devel` ## Documentation
- Arch Linux: `sudo pacman -S libmediainfo`
- macOS/Windows: Automatically handled by pymediainfo - **[ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md)** - Complete technical reference
- **[INSTALL.md](INSTALL.md)** - Installation instructions
- **[DEVELOP.md](DEVELOP.md)** - Development guide
- **[CHANGELOG.md](CHANGELOG.md)** - Version history
- **[CLAUDE.md](CLAUDE.md)** - AI assistant reference
---
## License
Not specified
---
## Credits
- Built with [Textual](https://textual.textualize.io/)
- Metadata from [MediaInfo](https://mediaarea.net/en/MediaInfo)
- Catalog data from [TMDB](https://www.themoviedb.org/)
---
**For complete documentation, see [ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md)**

408
REFACTORING_PROGRESS.md Normal file
View File

@@ -0,0 +1,408 @@
# Renamer - Refactoring Roadmap
**Version**: 0.7.0-dev
**Last Updated**: 2026-01-01
> **📋 For completed work, see [CHANGELOG.md](CHANGELOG.md)**
This document tracks the future refactoring plan for Renamer v0.7.0+.
---
## Completed Phases
**Phase 1**: Critical Bug Fixes (5/5) - [See CHANGELOG.md](CHANGELOG.md)
**Phase 2**: Architecture Foundation (5/5) - [See CHANGELOG.md](CHANGELOG.md)
**Phase 3**: Code Quality (5/5) - [See CHANGELOG.md](CHANGELOG.md)
---
## Pending Phases
### Phase 3.6: Cleanup and Preparation (0/2)
**Goal**: Clean up remaining issues before major refactoring.
**Status**: NOT STARTED
**Priority**: HIGH (Must complete before Phase 4)
#### 3.6.1 Refactor ProposedNameFormatter to Use Decorator Pattern
**Status**: NOT STARTED
**Current Issue**: `ProposedNameFormatter` stores extracted values in `__init__` as instance variables, creating unnecessary coupling.
**Goal**: Convert to functional/decorator pattern similar to other formatters.
**Current Code**:
```python
class ProposedNameFormatter:
def __init__(self, extractor):
self.__order = extractor.get('order')
self.__title = extractor.get('title')
# ... more instance variables
def rename_line(self) -> str:
return f"{self.__order}{self.__title}..."
```
**Target Design**:
```python
class ProposedNameFormatter:
@staticmethod
def format_proposed_name(extractor) -> str:
"""Generate proposed filename from extractor data"""
# Direct formatting without storing state
order = format_order(extractor.get('order'))
title = format_title(extractor.get('title'))
return f"{order}{title}..."
@staticmethod
def format_proposed_name_with_color(file_path, extractor) -> str:
"""Format proposed name with color highlighting"""
proposed = ProposedNameFormatter.format_proposed_name(extractor)
# Color logic here
```
**Benefits**:
- Stateless, pure functions
- Easier to test
- Consistent with other formatters
- Can use `@cached()` decorator if needed
- No coupling to extractor instance
**Files to Modify**:
- `renamer/formatters/proposed_name_formatter.py`
- Update all usages in `app.py`, `screens.py`, etc.
---
#### 3.6.2 Clean Up Decorators Directory
**Status**: NOT STARTED
**Current Issue**: `renamer/decorators/` directory contains legacy `caching.py` file that's no longer used. All cache decorators were moved to `renamer/cache/decorators.py` in Phase 1.
**Current Structure**:
```
renamer/decorators/
├── caching.py # ⚠️ LEGACY - Remove
└── __init__.py # Import from renamer.cache
```
**Actions**:
1. **Verify no direct imports** of `renamer.decorators.caching`
2. **Remove `caching.py`** - All functionality now in `renamer/cache/decorators.py`
3. **Keep `__init__.py`** for backward compatibility (imports from `renamer.cache`)
4. **Update any direct imports** to use `from renamer.cache import cached_method`
**Verification**:
```bash
# Check for direct imports of old caching module
grep -r "from renamer.decorators.caching" renamer/
grep -r "import renamer.decorators.caching" renamer/
# Should only find imports from __init__.py that re-export from renamer.cache
```
**Benefits**:
- Removes dead code
- Clarifies that all caching is in `renamer/cache/`
- Maintains backward compatibility via `__init__.py`
---
### Phase 4: Refactor to New Architecture (0/4)
**Goal**: Migrate existing code to use the new architecture from Phase 2.
**Status**: NOT STARTED
#### 4.1 Refactor Extractors to Use Protocol
- Update all extractors to explicitly implement `DataExtractor` Protocol
- Ensure consistent method signatures
- Add missing Protocol methods where needed
- Update type hints to match Protocol
**Files to Update**:
- `filename_extractor.py`
- `mediainfo_extractor.py`
- `metadata_extractor.py`
- `fileinfo_extractor.py`
- `tmdb_extractor.py`
#### 4.2 Refactor Formatters to Use Base Classes
- Update all formatters to inherit from appropriate base classes
- Move to `DataFormatter`, `TextFormatter`, or `MarkupFormatter`
- Ensure consistent interface
- Add missing abstract methods
**Files to Update**:
- `media_formatter.py`
- `catalog_formatter.py`
- `track_formatter.py`
- `proposed_name_formatter.py`
- All specialized formatters
#### 4.3 Integrate RenamerApp with Services
- Refactor `app.py` to use service layer
- Replace direct extractor calls with `MetadataService`
- Replace direct file operations with `RenameService`
- Replace direct tree building with `FileTreeService`
- Remove business logic from UI layer
**Expected Benefits**:
- Cleaner separation of concerns
- Easier testing
- Better error handling
- More maintainable code
#### 4.4 Update Imports and Dependencies
- Update all imports to use new architecture
- Remove deprecated patterns
- Verify no circular dependencies
- Update tests to match new structure
---
### Phase 5: Test Coverage (4/6 - 66% Complete)
**Goal**: Achieve comprehensive test coverage for all components.
**Status**: IN PROGRESS
#### ✅ 5.1 Service Layer Tests (COMPLETED)
- 30+ tests for FileTreeService, MetadataService, RenameService
- Integration tests for service workflows
#### ✅ 5.2 Utility Module Tests (COMPLETED)
- 70+ tests for PatternExtractor, LanguageCodeExtractor, FrameClassMatcher
- Integration tests for utility interactions
#### ✅ 5.3 Formatter Tests (COMPLETED)
- 40+ tests for all formatter classes
- FormatterApplier testing
#### ✅ 5.4 Dataset Organization (COMPLETED)
- Consolidated test data into `datasets/`
- 46 filename test cases
- 25 frame class test cases
- Sample file generator
#### ⏳ 5.5 Screen Tests (PENDING)
**Status**: NOT STARTED
**Scope**:
- Test OpenScreen functionality
- Test HelpScreen display
- Test RenameConfirmScreen workflow
- Test SettingsScreen interactions
- Mock user input
- Verify screen transitions
#### ⏳ 5.6 App Integration Tests (PENDING)
**Status**: NOT STARTED
**Scope**:
- End-to-end workflow testing
- Directory scanning → metadata display → rename
- Mode switching (technical/catalog)
- Cache integration
- Error handling flows
- Command palette integration
**Target Coverage**: >90%
---
### Phase 6: Documentation and Release (0/7)
**Goal**: Finalize documentation and prepare for release.
**Status**: NOT STARTED
#### 6.1 Update Technical Documentation
- ✅ ENGINEERING_GUIDE.md created
- [ ] API documentation generation
- [ ] Architecture diagrams
- [ ] Component interaction flows
#### 6.2 Update User Documentation
- ✅ README.md streamlined
- [ ] User guide with screenshots
- [ ] Common workflows documentation
- [ ] Troubleshooting guide
- [ ] FAQ section
#### 6.3 Update Developer Documentation
- ✅ DEVELOP.md streamlined
- [ ] Contributing guidelines
- [ ] Code review checklist
- [ ] PR template
- [ ] Issue templates
#### 6.4 Create CHANGELOG
- ✅ CHANGELOG.md created
- [ ] Detailed version history
- [ ] Migration guides for breaking changes
- [ ] Deprecation notices
#### 6.5 Version Bump to 0.7.0
- [ ] Update version in `pyproject.toml`
- [ ] Update version in all documentation
- [ ] Tag release in git
- [ ] Create GitHub release
#### 6.6 Build and Test Distribution
- [ ] Build wheel and tarball
- [ ] Test installation from distribution
- [ ] Verify all commands work
- [ ] Test on clean environment
- [ ] Cross-platform testing
#### 6.7 Prepare for PyPI Release (Optional)
- [ ] Create PyPI account
- [ ] Configure package metadata
- [ ] Test upload to TestPyPI
- [ ] Upload to PyPI
- [ ] Verify installation from PyPI
---
## Testing Status
### Current Metrics
- **Total Tests**: 560
- **Pass Rate**: 100% (559 passed, 1 skipped)
- **Coverage**: ~70% (estimated)
- **Target**: >90%
### Manual Testing Checklist
- [ ] Test with large directories (1000+ files)
- [ ] Test with various video formats
- [ ] Test TMDB integration with real API
- [ ] Test poster download and display
- [ ] Test cache expiration and cleanup
- [ ] Test concurrent file operations
- [ ] Test error recovery
- [ ] Test resource cleanup (no leaks)
- [ ] Performance regression testing
---
## Known Limitations
### Current Issues
- TMDB API requires internet connection
- Poster display requires image-capable terminal
- Some special characters need sanitization
- Large directories may have slow initial scan
### Planned Fixes
- Add offline mode with cached data
- Graceful degradation for terminal without image support
- Improve filename sanitization
- Optimize directory scanning with progress indication
---
## Breaking Changes to Consider
### Potential Breaking Changes in 0.7.0
- Cache key format (already changed in 0.6.0)
- Service layer API (internal, shouldn't affect users)
- Configuration file schema (may need migration)
### Migration Strategy
- Provide migration scripts where needed
- Document all breaking changes in CHANGELOG
- Maintain backward compatibility where possible
- Deprecation warnings before removal
---
## Performance Goals
### Current Performance
- ~2 seconds for 100 files (initial scan)
- ~50ms per file (metadata extraction with cache)
- ~200ms per file (TMDB lookup)
### Target Performance
- <1 second for 100 files
- <30ms per file (cached)
- <100ms per file (TMDB with cache)
- Background loading for large directories
---
## Architecture Improvements
### Already Implemented (Phase 2)
- ✅ Protocol-based extractors
- ✅ Service layer
- ✅ Utility modules
- ✅ Unified cache subsystem
- ✅ Thread pool for concurrent operations
### Future Improvements
- [ ] Plugin system for custom extractors/formatters
- [ ] Event-driven architecture for UI updates
- [ ] Dependency injection container
- [ ] Configuration validation schema
- [ ] API versioning
---
## Success Criteria
### Phase 4 Complete When:
- [ ] All extractors implement Protocol
- [ ] All formatters use base classes
- [ ] RenamerApp uses services exclusively
- [ ] No direct business logic in UI
- [ ] All tests passing
- [ ] No performance regression
### Phase 5 Complete When:
- [ ] >90% code coverage
- [ ] All screens tested
- [ ] Integration tests complete
- [ ] Manual testing checklist done
- [ ] Performance goals met
### Phase 6 Complete When:
- [ ] All documentation updated
- [ ] Version bumped to 0.7.0
- [ ] Distribution built and tested
- [ ] Release notes published
- [ ] Migration guide available
---
## Next Steps
1. **Start Phase 4**: Refactor to new architecture
- Begin with extractor Protocol implementation
- Update one extractor at a time
- Run tests after each change
- Document any issues encountered
2. **Complete Phase 5**: Finish test coverage
- Add screen tests
- Add integration tests
- Run coverage analysis
- Fix any gaps
3. **Execute Phase 6**: Documentation and release
- Update all docs
- Build distribution
- Test thoroughly
- Release v0.7.0
---
**See Also**:
- [CHANGELOG.md](CHANGELOG.md) - Completed work
- [ToDo.md](ToDo.md) - Future feature requests
- [ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md) - Technical documentation
**Last Updated**: 2026-01-01

284
ToDo.md
View File

@@ -1,136 +1,176 @@
Project: Media File Renamer and Metadata Viewer (Python TUI with Textual) # Renamer - Future Tasks
**Current Version**: 0.5.10 **Version**: 0.7.0-dev
**Last Updated**: 2026-01-01
## TODO Steps: > **📋 For completed work, see [CHANGELOG.md](CHANGELOG.md)**
1. ✅ Set up Python project structure with UV package manager >
2. ✅ Install dependencies: textual, mutagen, pymediainfo, python-magic, pathlib for file handling > **📋 For refactoring plans, see [REFACTORING_PROGRESS.md](REFACTORING_PROGRESS.md)**
3. ✅ Implement recursive directory scanning for video files (*.mkv, *.avi, *.mov, *.mp4, *.wmv, *.flv, *.webm, etc.)
4. ✅ Detect real media container type using mutagen and python-magic This file tracks future feature enhancements and improvements.
5. ✅ Create Textual TUI application with split layout (left: file tree, right: file details)
6. ✅ Implement file tree display with navigation (keyboard arrows, mouse support)
7. ✅ Add bottom command bar with 'quit', 'open directory', 'scan' commands
8. ✅ Display file details on right side: file size, extension from filename, extension from metadata, file date
9. ✅ Add functionality to select files in the tree and update right panel
10. ✅ Implement detailed metadata display including video/audio/subtitle tracks with colors
11. ✅ Add custom tree styling with file icons and colored guides
12. ✅ Add scrollable details panel
13. ✅ Handle markup escaping for file names with brackets
14. ✅ Implement file renaming functionality with confirmation dialog
15. ✅ Add proposed name generation based on metadata extraction
16. ✅ Add help screen with key bindings and usage information
17. ✅ Add tree expansion/collapse toggle functionality
18. ✅ Add file refresh functionality to reload metadata for selected file
19. ✅ Optimize tree updates to avoid full reloads after renaming
20. ✅ Add loading indicators for metadata extraction
21. ✅ Add error handling for file operations and metadata extraction
22. 🔄 Implement blue highlighting for changed parts in proposed filename display (show differences between current and proposed names)
23. 🔄 Implement build script to exclude dev commands (bump-version, release) from distributed package
24. 📋 Implement metadata editing capabilities (future enhancement)
25. 📋 Add batch rename operations (future enhancement)
26. 📋 Add plugin system for custom extractors/formatters (future enhancement)
27. 📋 Add advanced search and filtering capabilities (future enhancement)
28. 📋 Implement undo/redo functionality for file operations (future enhancement)
--- ---
## Media Catalog Mode Implementation Plan ## Priority Tasks
**New big app evolution step: Add media catalog mode with settings, caching, and enhanced TMDB display.**
### Phase 1: Settings Management Foundation
1. ✅ Create settings module (`renamer/settings.py`) for JSON config in `~/.config/renamer/config.json` with schema: mode, cache TTLs
2. ✅ Integrate settings into app startup (load/save on launch/exit)
3. ✅ Add settings window to UI with fields for mode and TTLs
4. ✅ Add "Open Settings" command to command panel
5. ✅ Order setting menu item in the action bar by right side, close to the sysytem menu item ^p palette
### Phase 2: Mode Toggle and UI Switching
5. ✅ Add "Toggle Mode" command to switch between "technical" and "catalog" modes
6. ✅ Modify right pane for mode-aware display (technical vs catalog info)
7. ✅ Persist and restore mode state from settings
### Phase 3: Caching System
8. ✅ Create caching module (`renamer/cache.py`) for file-based cache with TTL support
9. ✅ Integrate caching into extractors (check cache first, store results)
10. ✅ Add refresh command to force re-extraction and cache update
11. ✅ Handle cache cleanup on file rename (invalidate old filename)
### Phase 4: Media Catalog Display
12. ✅ Update TMDB extractor for catalog data: title, year, duration, rates, overview, genres codes, poster_path
13. ✅ Create catalog formatter (`formatters/catalog_formatter.py`) for beautiful display
14. ✅ Integrate catalog display into right pane
### Phase 5: Poster Handling and Display
15. ✅ Add poster caching (images in cache dir with 1-month TTL)
16. ✅ Implement terminal image display (using rich-pixels library)
### Phase 6: Polish and Documentation
17. ✅ Create comprehensive CLAUDE.md for AI assistants
18. ✅ Update all markdown documentation files
19. ✅ Ensure version consistency across all files
### Additional TODOs from Plan
- 📋 Retrieve full movie details from TMDB (currently basic data only)
- 📋 Expand genres to full names instead of codes (currently shows genre IDs)
- 📋 Optimize poster quality and display (improve image rendering)
- 📋 Add TV show support (currently movie-focused)
- 📋 Implement blue highlighting for filename differences
- 📋 Build script to exclude dev commands from distribution
---
## Recently Completed (v0.5.x)
### Version 0.5.10
- Complete media catalog mode implementation
- TMDB integration with poster display
- Settings system with persistent JSON storage
- Advanced caching with TTL support
- Dual-mode display (technical/catalog)
- Settings UI screen
### Version 0.4.x
- Enhanced extractor system
- TMDB extractor foundation
- Improved formatter architecture
### Version 0.3.x
- Expanded metadata extraction
- Multiple formatter types
- Special edition detection
### Version 0.2.x
- Initial TUI implementation
- Basic metadata extraction
- File tree navigation
- Rename functionality
---
## Development Priorities
### High Priority ### High Priority
1. 🔄 Blue highlighting for filename differences (UX improvement)
2. 🔄 Build script for clean distribution packages - [ ] **Phase 4: Refactor to New Architecture**
3. 📋 Genre ID to name expansion (TMDB lookup) - Refactor existing extractors to use Protocol
- Refactor existing formatters to use base classes
- Integrate RenamerApp with services
- Update all imports and dependencies
- See [REFACTORING_PROGRESS.md](REFACTORING_PROGRESS.md) for details
- [ ] **Complete Test Coverage**
- Add UI screen tests
- Add app integration tests
- Increase coverage to >90%
### Medium Priority ### Medium Priority
1. 📋 Batch rename operations
2. 📋 Advanced search/filtering
3. 📋 TV show support
### Low Priority (Future) - [ ] **MKV Metadata Editor with mkvpropedit**
1. 📋 Metadata editing - Fast metadata editing without re-encoding (using mkvpropedit)
2. 📋 Plugin system - Edit container title from TMDB data
3. 📋 Undo/redo functionality - Set audio/subtitle track languages from filename
4. 📋 Configuration profiles - Set track names and flags
- Batch editing support with preview
- Validation before applying changes
- [ ] **Batch Rename Operations**
- Select multiple files
- Preview all changes
- Bulk rename with rollback
- [ ] **Advanced Search and Filtering**
- Filter by resolution, codec, year
- Search by TMDB metadata
- Save filter presets
--- ---
**Legend:** ## Feature Enhancements
- ✅ Completed
- 🔄 In Progress / Partially Complete
- 📋 Planned / Future Enhancement
**Last Updated**: 2025-12-31 ### UI Improvements
- [ ] **Blue Highlighting for Filename Differences**
- Show changed parts in proposed filename
- Color-code additions, removals, changes
- Side-by-side comparison view
- [ ] **Enhanced Poster Display**
- Optimize image quality
- Support for fanart/backdrops
- Poster cache management UI
- [ ] **Dedicated Poster Window with Real Image Support**
- Create separate panel/window for poster display in catalog mode
- Display actual poster images (not ASCII art) using terminal graphics protocols
- Support for Kitty graphics protocol, iTerm2 inline images, or Sixel
- Configurable poster size with smaller font rendering
- Side-by-side layout: metadata (60%) + poster (40%)
- Higher resolution ASCII art as fallback (100+ chars with extended gradient)
- [ ] **Progress Indicators**
- Show scan progress
- Batch operation progress bars
- Background task status
### TMDB Integration
- [ ] **Full Movie Details**
- Cast and crew information
- Production companies
- Budget and revenue data
- Release dates by region
- [ ] **Genre Name Expansion**
- Show full genre names instead of IDs
- Genre-based filtering
- Multi-genre support
- [ ] **TV Show Support**
- Episode and season metadata
- TV show renaming patterns
- Episode numbering detection
- [ ] **Collection/Series Support**
- Detect movie collections
- Group related media
- Collection-based renaming
### Technical Improvements
- [ ] **Undo/Redo Functionality**
- Track file operations history
- Undo renames
- Redo operations
- Operation log
- [ ] **Performance Optimization**
- Lazy loading for large directories
- Virtual scrolling in tree view
- Background metadata extraction
- Smart cache invalidation
### Build and Distribution
- [ ] **Build Script Improvements**
- Exclude dev commands from distribution
- Automated release workflow
- Cross-platform testing
- [ ] **Package Distribution**
- PyPI publication
- Homebrew formula
- AUR package
- Docker image
---
## Potential Future Features
### Advanced Features
- [ ] Subtitle downloading and management
- [ ] NFO file generation
- [ ] Integration with media servers (Plex, Jellyfin, Emby)
- [ ] Watch history tracking
- [ ] Duplicate detection
- [ ] Quality comparison (upgrade detection)
### Integrations
- [ ] Multiple database support (TVDB, Trakt, AniDB)
- [ ] Custom API integrations
- [ ] Local database option (offline mode)
- [ ] Webhook support for automation
### Export/Import
- [ ] Export catalog to CSV/JSON
- [ ] Import rename mappings
- [ ] Backup/restore settings
- [ ] Configuration profiles
---
## Known Issues
See [REFACTORING_PROGRESS.md](REFACTORING_PROGRESS.md) for current limitations and planned fixes.
---
## Contributing
Before working on any task:
1. Check [ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md) for architecture details
2. Review [CHANGELOG.md](CHANGELOG.md) for recent changes
3. Read [DEVELOP.md](DEVELOP.md) for development setup
4. Run tests: `uv run pytest`
5. Follow code standards in [ENGINEERING_GUIDE.md](ENGINEERING_GUIDE.md#code-standards)
---
**Last Updated**: 2026-01-01

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
dist/renamer-0.6.10-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.6.11-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.6.12-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.6.9-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.7.1-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.7.10-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.7.2-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.7.3-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.7.4-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.7.5-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.7.6-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.7.7-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.7.8-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.7.9-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.8.1-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.8.10-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.8.11-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.8.2-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.8.3-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.8.4-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.8.5-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.8.6-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.8.7-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.8.8-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/renamer-0.8.9-py3-none-any.whl vendored Normal file

Binary file not shown.

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "renamer" name = "renamer"
version = "0.6.0" version = "0.8.11"
description = "Terminal-based media file renamer and metadata viewer" description = "Terminal-based media file renamer and metadata viewer"
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"
@@ -15,6 +15,11 @@ dependencies = [
"rich-pixels>=1.0.0", "rich-pixels>=1.0.0",
] ]
[project.optional-dependencies]
dev = [
"mypy>=1.0.0",
]
[project.scripts] [project.scripts]
renamer = "renamer.main:main" renamer = "renamer.main:main"
bump-version = "renamer.bump:main" bump-version = "renamer.bump:main"

View File

@@ -2,6 +2,6 @@
from .app import RenamerApp from .app import RenamerApp
from .extractors.extractor import MediaExtractor from .extractors.extractor import MediaExtractor
from .formatters.media_formatter import MediaFormatter from .views import MediaPanelView, ProposedFilenameView
__all__ = ['RenamerApp', 'MediaExtractor', 'MediaFormatter'] __all__ = ['RenamerApp', 'MediaExtractor', 'MediaPanelView', 'ProposedFilenameView']

View File

@@ -2,66 +2,155 @@ from textual.app import App, ComposeResult
from textual.widgets import Tree, Static, Footer, LoadingIndicator from textual.widgets import Tree, Static, Footer, LoadingIndicator
from textual.containers import Horizontal, Container, ScrollableContainer, Vertical from textual.containers import Horizontal, Container, ScrollableContainer, Vertical
from textual.widget import Widget from textual.widget import Widget
from textual.command import Provider, Hit
from rich.markup import escape from rich.markup import escape
from pathlib import Path from pathlib import Path
from functools import partial
import threading import threading
import time import time
import logging import logging
import os
from .logging_config import LoggerConfig # Initialize logging singleton
from .constants import MEDIA_TYPES from .constants import MEDIA_TYPES
from .screens import OpenScreen, HelpScreen, RenameConfirmScreen, SettingsScreen from .screens import OpenScreen, HelpScreen, RenameConfirmScreen, SettingsScreen, ConvertConfirmScreen
from .extractors.extractor import MediaExtractor from .extractors.extractor import MediaExtractor
from .formatters.media_formatter import MediaFormatter from .views import MediaPanelView, ProposedFilenameView
from .formatters.proposed_name_formatter import ProposedNameFormatter
from .formatters.text_formatter import TextFormatter from .formatters.text_formatter import TextFormatter
from .formatters.catalog_formatter import CatalogFormatter from .formatters.catalog_formatter import CatalogFormatter
from .settings import Settings from .settings import Settings
from .cache import Cache, CacheManager
from .services.conversion_service import ConversionService
# Set up logging conditionally class CacheCommandProvider(Provider):
if os.getenv('FORMATTER_LOG', '0') == '1': """Command provider for cache management operations."""
logging.basicConfig(filename='formatter.log', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s') async def search(self, query: str):
else: """Search for cache commands matching the query."""
logging.basicConfig(level=logging.INFO) # Enable logging for debugging matcher = self.matcher(query)
commands = [
("cache_stats", "Cache: View Statistics", "View cache statistics (size, entries, etc.)"),
("cache_clear_all", "Cache: Clear All", "Clear all cache entries"),
("cache_clear_extractors", "Cache: Clear Extractors", "Clear extractor cache only"),
("cache_clear_tmdb", "Cache: Clear TMDB", "Clear TMDB API cache only"),
("cache_clear_posters", "Cache: Clear Posters", "Clear poster image cache only"),
("cache_clear_expired", "Cache: Clear Expired", "Remove expired cache entries"),
("cache_compact", "Cache: Compact", "Remove empty cache directories"),
]
for command_name, display_name, help_text in commands:
if (score := matcher.match(display_name)) > 0:
yield Hit(
score,
matcher.highlight(display_name),
partial(self.app.action_cache_command, command_name),
help=help_text
)
class AppCommandProvider(Provider):
"""Command provider for main application operations."""
async def search(self, query: str):
"""Search for app commands matching the query."""
matcher = self.matcher(query)
commands = [
("open", "Open Directory", "Open a directory to browse media files (o)"),
("scan_local", "Scan Node", "Scan current node's directory only (s)"),
("scan", "Scan Tree", "Scan entire directory tree (Ctrl+S)"),
("refresh", "Refresh File", "Refresh metadata for selected file (f)"),
("rename", "Rename File", "Rename the selected file (r)"),
("convert", "Convert to MKV", "Convert AVI/MPG/MPEG/WebM/MP4 file to MKV container with metadata (c)"),
("delete", "Delete File", "Delete the selected file (d)"),
("toggle_mode", "Toggle Display Mode", "Switch between technical and catalog view (m)"),
("expand", "Toggle Tree Expansion", "Expand or collapse all tree nodes (t)"),
("settings", "Settings", "Open settings screen (p)"),
("help", "Help", "Show keyboard shortcuts and help (h)"),
]
for command_name, display_name, help_text in commands:
if (score := matcher.match(display_name)) > 0:
yield Hit(
score,
matcher.highlight(display_name),
partial(self.app.run_action, command_name),
help=help_text
)
class RenamerApp(App): class RenamerApp(App):
CSS = """ CSS = """
/* Default technical mode: 2 columns */
#left { #left {
width: 50%; width: 50%;
padding: 1; padding: 1;
} }
#right { #middle {
width: 50%; width: 50%;
padding: 1; padding: 1;
} }
#right {
display: none; /* Hidden in technical mode */
}
/* Catalog mode: 3 columns */
.catalog-mode #left {
width: 33%;
}
.catalog-mode #middle {
width: 34%;
}
.catalog-mode #right {
display: block;
width: 33%;
padding: 1;
}
#poster_container {
height: 100%;
overflow-y: auto;
}
#poster_display {
height: auto;
}
""" """
BINDINGS = [ BINDINGS = [
("q", "quit", "Quit"), ("q", "quit", "Quit"),
("o", "open", "Open directory"), ("o", "open", "Open directory"),
("s", "scan", "Scan"), ("s", "scan_local", "Scan Node"),
("ctrl+s", "scan", "Scan Tree"),
("f", "refresh", "Refresh"), ("f", "refresh", "Refresh"),
("r", "rename", "Rename"), ("r", "rename", "Rename"),
("p", "expand", "Toggle Tree"), ("c", "convert", "Convert to MKV"),
("d", "delete", "Delete"),
("t", "expand", "Toggle Tree"),
("m", "toggle_mode", "Toggle Mode"), ("m", "toggle_mode", "Toggle Mode"),
("h", "help", "Help"), ("h", "help", "Help"),
("ctrl+s", "settings", "Settings"), ("p", "settings", "Settings"),
] ]
# Command palette - extend built-in commands with cache and app commands
COMMANDS = App.COMMANDS | {CacheCommandProvider, AppCommandProvider}
def __init__(self, scan_dir): def __init__(self, scan_dir):
super().__init__() super().__init__()
self.scan_dir = Path(scan_dir) if scan_dir else None self.scan_dir = Path(scan_dir) if scan_dir else None
self.tree_expanded = False self.tree_expanded = False
self.settings = Settings() self.settings = Settings()
# Initialize cache system
self.cache = Cache()
self.cache_manager = CacheManager(self.cache)
def compose(self) -> ComposeResult: def compose(self) -> ComposeResult:
with Horizontal(): with Horizontal(id="main_container"):
with Container(id="left"): with Container(id="left"):
yield Tree("Files", id="file_tree") yield Tree("Files", id="file_tree")
with Container(id="right"): # Middle container (for catalog mode info)
with Container(id="middle"):
with Vertical(): with Vertical():
yield LoadingIndicator(id="loading") yield LoadingIndicator(id="loading")
with ScrollableContainer(id="details_container"): with ScrollableContainer(id="details_container"):
@@ -72,15 +161,30 @@ class RenamerApp(App):
"", id="details_catalog", markup=False "", id="details_catalog", markup=False
) )
yield Static("", id="proposed", markup=True) yield Static("", id="proposed", markup=True)
# Right container (for poster in catalog mode, hidden in technical mode)
with Container(id="right"):
with ScrollableContainer(id="poster_container"):
yield Static("", id="poster_display", markup=False)
yield Footer() yield Footer()
def on_mount(self): def on_mount(self):
loading = self.query_one("#loading", LoadingIndicator) loading = self.query_one("#loading", LoadingIndicator)
loading.display = False loading.display = False
# Apply initial layout based on mode setting
self._update_layout()
self.scan_files() self.scan_files()
def _update_layout(self):
"""Update layout based on current mode setting."""
mode = self.settings.get("mode")
main_container = self.query_one("#main_container")
if mode == "catalog":
main_container.add_class("catalog-mode")
else:
main_container.remove_class("catalog-mode")
def scan_files(self): def scan_files(self):
logging.info("scan_files called")
if not self.scan_dir or not self.scan_dir.exists() or not self.scan_dir.is_dir(): if not self.scan_dir or not self.scan_dir.exists() or not self.scan_dir.is_dir():
details = self.query_one("#details_technical", Static) details = self.query_one("#details_technical", Static)
details.update("Error: Directory does not exist or is not a directory") details.update("Error: Directory does not exist or is not a directory")
@@ -92,6 +196,33 @@ class RenamerApp(App):
self.tree_expanded = False # Sub-levels are collapsed self.tree_expanded = False # Sub-levels are collapsed
self.set_focus(tree) self.set_focus(tree)
def _get_file_icon(self, file_path: Path) -> str:
"""Get icon for file based on extension.
Args:
file_path: Path to the file
Returns:
Icon character for the file type
"""
ext = file_path.suffix.lower().lstrip('.')
# File type icons
icons = {
'mkv': '󰈫', # Video camera for MKV
'mk3d': '󰟽', # Clapper board for 3D
'mp4': '󰎁', # Video camera
'mov': '󰎁', # Video camera
'webm': '', # Video camera
'avi': '💿', # Film frames for AVI
'wmv': '📀', # Video camera
'm4v': '📹', # Video camera
'mpg': '📼', # Video camera
'mpeg': '📼', # Video camera
}
return icons.get(ext, '📄') # Default to document icon
def build_tree(self, path: Path, node): def build_tree(self, path: Path, node):
try: try:
for item in sorted(path.iterdir()): for item in sorted(path.iterdir()):
@@ -99,13 +230,17 @@ class RenamerApp(App):
if item.is_dir(): if item.is_dir():
if item.name.startswith(".") or item.name == "lost+found": if item.name.startswith(".") or item.name == "lost+found":
continue continue
subnode = node.add(escape(item.name), data=item) # Add folder icon before directory name
label = f"󰉋 {escape(item.name)}"
subnode = node.add(label, data=item)
self.build_tree(item, subnode) self.build_tree(item, subnode)
elif item.is_file() and item.suffix.lower() in { elif item.is_file() and item.suffix.lower() in {
f".{ext}" for ext in MEDIA_TYPES f".{ext}" for ext in MEDIA_TYPES
}: }:
logging.info(f"Adding file to tree: {item.name!r} (full path: {item})") # Add file type icon before filename
node.add(escape(item.name), data=item) icon = self._get_file_icon(item)
label = f"{icon} {escape(item.name)}"
node.add(label, data=item)
except PermissionError: except PermissionError:
pass pass
except PermissionError: except PermissionError:
@@ -130,61 +265,93 @@ class RenamerApp(App):
def on_tree_node_highlighted(self, event): def on_tree_node_highlighted(self, event):
node = event.node node = event.node
if node.data and isinstance(node.data, Path): if node.data and isinstance(node.data, Path):
if node.data.is_dir(): # Check if path still exists
if not node.data.exists():
self._stop_loading_animation() self._stop_loading_animation()
details = self.query_one("#details_technical", Static) details = self.query_one("#details_technical", Static)
details.display = True details.display = True
details_catalog = self.query_one("#details_catalog", Static) details_catalog = self.query_one("#details_catalog", Static)
details_catalog.display = False details_catalog.display = False
details.update("Directory") details.update(f"[red]Path no longer exists: {node.data.name}[/red]")
proposed = self.query_one("#proposed", Static)
proposed.update("")
return
try:
if node.data.is_dir():
self._stop_loading_animation()
details = self.query_one("#details_technical", Static)
details.display = True
details_catalog = self.query_one("#details_catalog", Static)
details_catalog.display = False
details.update("Directory")
proposed = self.query_one("#proposed", Static)
proposed.update("")
elif node.data.is_file():
self._start_loading_animation()
threading.Thread(
target=self._extract_and_show_details, args=(node.data,)
).start()
except (FileNotFoundError, OSError):
# Handle race condition where file was deleted between exists() check and is_file() call
self._stop_loading_animation()
details = self.query_one("#details_technical", Static)
details.display = True
details_catalog = self.query_one("#details_catalog", Static)
details_catalog.display = False
details.update(f"[red]Error accessing path: {node.data.name}[/red]")
proposed = self.query_one("#proposed", Static) proposed = self.query_one("#proposed", Static)
proposed.update("") proposed.update("")
elif node.data.is_file():
self._start_loading_animation()
threading.Thread(
target=self._extract_and_show_details, args=(node.data,)
).start()
def _extract_and_show_details(self, file_path: Path): def _extract_and_show_details(self, file_path: Path):
try: try:
# Initialize extractors and formatters # Initialize extractors and formatters
extractor = MediaExtractor(file_path) extractor = MediaExtractor(file_path)
mode = self.settings.get("mode") mode = self.settings.get("mode")
poster_content = ""
if mode == "technical": if mode == "technical":
formatter = MediaFormatter(extractor) formatter = MediaPanelView(extractor)
full_info = formatter.file_info_panel() full_info = formatter.file_info_panel()
else: # catalog else: # catalog
formatter = CatalogFormatter(extractor) formatter = CatalogFormatter(extractor, self.settings)
full_info = formatter.format_catalog_info() full_info, poster_content = formatter.format_catalog_info()
# Update UI # Update UI
self.call_later( self.call_later(
self._update_details, self._update_details,
full_info, full_info,
ProposedNameFormatter(extractor).rename_line_formatted(file_path), ProposedFilenameView(extractor).rename_line_formatted(file_path),
poster_content,
) )
except Exception as e: except Exception as e:
self.call_later( self.call_later(
self._update_details, self._update_details,
TextFormatter.red(f"Error extracting details: {str(e)}"), TextFormatter.red(f"Error extracting details: {str(e)}"),
"", "",
"",
) )
def _update_details(self, full_info: str, display_string: str): def _update_details(self, full_info: str, display_string: str, poster_content: str = ""):
self._stop_loading_animation() self._stop_loading_animation()
details_technical = self.query_one("#details_technical", Static) details_technical = self.query_one("#details_technical", Static)
details_catalog = self.query_one("#details_catalog", Static) details_catalog = self.query_one("#details_catalog", Static)
poster_display = self.query_one("#poster_display", Static)
mode = self.settings.get("mode") mode = self.settings.get("mode")
if mode == "technical": if mode == "technical":
details_technical.display = True details_technical.display = True
details_catalog.display = False details_catalog.display = False
details_technical.update(full_info) details_technical.update(full_info)
poster_display.update("") # Clear poster in technical mode
else: else:
details_technical.display = False details_technical.display = False
details_catalog.display = True details_catalog.display = True
details_catalog.update(full_info) details_catalog.update(full_info)
# Update poster panel
poster_display.update(poster_content)
proposed = self.query_one("#proposed", Static) proposed = self.query_one("#proposed", Static)
proposed.update(display_string) proposed.update(display_string)
@@ -198,14 +365,73 @@ class RenamerApp(App):
if self.scan_dir: if self.scan_dir:
self.scan_files() self.scan_files()
async def action_scan_local(self):
"""Scan only the current node's directory (refresh node)."""
tree = self.query_one("#file_tree", Tree)
node = tree.cursor_node
if not node or not node.data:
self.notify("Please select a node first", severity="warning", timeout=3)
return
# Get the directory to scan
path = node.data
# Check if the path still exists
if not path.exists():
self.notify(f"Path no longer exists: {path.name}", severity="error", timeout=3)
# Remove the node from the tree since the file/dir is gone
if node.parent:
node.remove()
return
try:
if path.is_file():
# If it's a file, scan its parent directory
path = path.parent
# Find the parent node in the tree
if node.parent:
node = node.parent
else:
self.notify("Cannot scan root level file", severity="warning", timeout=3)
return
except (FileNotFoundError, OSError) as e:
self.notify(f"Error accessing path: {e}", severity="error", timeout=3)
if node.parent:
node.remove()
return
# Clear the node and rescan
node.remove_children()
self.build_tree(path, node)
# Expand the node to show new content
node.expand()
self.notify(f"Rescanned: {path.name}", severity="information", timeout=2)
async def action_refresh(self): async def action_refresh(self):
tree = self.query_one("#file_tree", Tree) tree = self.query_one("#file_tree", Tree)
node = tree.cursor_node node = tree.cursor_node
if node and node.data and isinstance(node.data, Path) and node.data.is_file(): if node and node.data and isinstance(node.data, Path):
self._start_loading_animation() # Check if path still exists
threading.Thread( if not node.data.exists():
target=self._extract_and_show_details, args=(node.data,) self.notify(f"Path no longer exists: {node.data.name}", severity="error", timeout=3)
).start() return
try:
if node.data.is_file():
# Invalidate cache for this file before re-extracting
cache = Cache()
invalidated = cache.invalidate_file(node.data)
logging.info(f"Refresh: invalidated {invalidated} cache entries for {node.data.name}")
self._start_loading_animation()
threading.Thread(
target=self._extract_and_show_details, args=(node.data,)
).start()
except (FileNotFoundError, OSError) as e:
self.notify(f"Error accessing file: {e}", severity="error", timeout=3)
async def action_help(self): async def action_help(self):
self.push_screen(HelpScreen()) self.push_screen(HelpScreen())
@@ -213,10 +439,60 @@ class RenamerApp(App):
async def action_settings(self): async def action_settings(self):
self.push_screen(SettingsScreen()) self.push_screen(SettingsScreen())
async def action_cache_command(self, command: str):
"""Execute a cache management command.
Args:
command: The cache command to execute (e.g., 'cache_stats', 'cache_clear_all')
"""
try:
if command == "cache_stats":
stats = self.cache_manager.get_stats()
stats_text = f"""Cache Statistics:
Total Files: {stats['total_files']}
Total Size: {stats['total_size_mb']:.2f} MB
Memory Entries: {stats['memory_cache_entries']}
By Category:"""
for subdir, info in stats['subdirs'].items():
stats_text += f"\n {subdir}: {info['file_count']} files, {info['size_mb']:.2f} MB"
self.notify(stats_text, severity="information", timeout=10)
elif command == "cache_clear_all":
count = self.cache_manager.clear_all()
self.notify(f"Cleared all cache: {count} entries removed", severity="information", timeout=3)
elif command == "cache_clear_extractors":
count = self.cache_manager.clear_by_prefix("extractor_")
self.notify(f"Cleared extractor cache: {count} entries removed", severity="information", timeout=3)
elif command == "cache_clear_tmdb":
count = self.cache_manager.clear_by_prefix("tmdb_")
self.notify(f"Cleared TMDB cache: {count} entries removed", severity="information", timeout=3)
elif command == "cache_clear_posters":
count = self.cache_manager.clear_by_prefix("poster_")
self.notify(f"Cleared poster cache: {count} entries removed", severity="information", timeout=3)
elif command == "cache_clear_expired":
count = self.cache_manager.clear_expired()
self.notify(f"Cleared {count} expired entries", severity="information", timeout=3)
elif command == "cache_compact":
self.cache_manager.compact_cache()
self.notify("Cache compacted successfully", severity="information", timeout=3)
except Exception as e:
self.notify(f"Error executing cache command: {str(e)}", severity="error", timeout=5)
async def action_toggle_mode(self): async def action_toggle_mode(self):
current_mode = self.settings.get("mode") current_mode = self.settings.get("mode")
new_mode = "catalog" if current_mode == "technical" else "technical" new_mode = "catalog" if current_mode == "technical" else "technical"
self.settings.set("mode", new_mode) self.settings.set("mode", new_mode)
# Update layout to show/hide poster panel
self._update_layout()
self.notify(f"Switched to {new_mode} mode", severity="information", timeout=2) self.notify(f"Switched to {new_mode} mode", severity="information", timeout=2)
# Refresh current file display if any # Refresh current file display if any
tree = self.query_one("#file_tree", Tree) tree = self.query_one("#file_tree", Tree)
@@ -230,16 +506,105 @@ class RenamerApp(App):
async def action_rename(self): async def action_rename(self):
tree = self.query_one("#file_tree", Tree) tree = self.query_one("#file_tree", Tree)
node = tree.cursor_node node = tree.cursor_node
if node and node.data and isinstance(node.data, Path) and node.data.is_file(): if node and node.data and isinstance(node.data, Path):
# Get the proposed name from the extractor # Check if file exists
extractor = MediaExtractor(node.data) if not node.data.exists():
proposed_formatter = ProposedNameFormatter(extractor) self.notify(f"File no longer exists: {node.data.name}", severity="error", timeout=3)
new_name = str(proposed_formatter) return
logging.info(f"Proposed new name: {new_name!r} for file: {node.data}")
if new_name and new_name != node.data.name: try:
self.push_screen(RenameConfirmScreen(node.data, new_name)) if node.data.is_file():
else: # Get the proposed name from the extractor
self.notify("Proposed name is the same as current name; no rename needed.", severity="information", timeout=3) extractor = MediaExtractor(node.data)
proposed_formatter = ProposedFilenameView(extractor)
new_name = str(proposed_formatter)
logging.info(f"Proposed new name: {new_name!r} for file: {node.data}")
# Always open rename dialog, even if names are the same (user might want to manually edit)
if new_name:
self.push_screen(RenameConfirmScreen(node.data, new_name))
except (FileNotFoundError, OSError) as e:
self.notify(f"Error accessing file: {e}", severity="error", timeout=3)
async def action_convert(self):
"""Convert AVI/MPG/MPEG/WebM/MP4 file to MKV with metadata preservation."""
tree = self.query_one("#file_tree", Tree)
node = tree.cursor_node
if not (node and node.data and isinstance(node.data, Path)):
self.notify("Please select a file first", severity="warning", timeout=3)
return
# Check if file exists
if not node.data.exists():
self.notify(f"File no longer exists: {node.data.name}", severity="error", timeout=3)
return
try:
if not node.data.is_file():
self.notify("Please select a file first", severity="warning", timeout=3)
return
except (FileNotFoundError, OSError) as e:
self.notify(f"Error accessing file: {e}", severity="error", timeout=3)
return
file_path = node.data
conversion_service = ConversionService()
# Check if file can be converted
if not conversion_service.can_convert(file_path):
self.notify("Only AVI, MPG, MPEG, WebM, and MP4 files can be converted to MKV", severity="error", timeout=3)
return
# Create extractor for metadata
try:
extractor = MediaExtractor(file_path)
except Exception as e:
self.notify(f"Failed to read file metadata: {e}", severity="error", timeout=5)
return
# Get audio track count and map languages
audio_tracks = extractor.get('audio_tracks', 'MediaInfo') or []
if not audio_tracks:
self.notify("No audio tracks found in file", severity="error", timeout=3)
return
audio_languages = conversion_service.map_audio_languages(extractor, len(audio_tracks))
subtitle_files = conversion_service.find_subtitle_files(file_path)
mkv_path = file_path.with_suffix('.mkv')
# Show confirmation screen (conversion happens in screen's on_button_pressed)
self.push_screen(
ConvertConfirmScreen(file_path, mkv_path, audio_languages, subtitle_files, extractor)
)
async def action_delete(self):
"""Delete a file with confirmation."""
from .screens import DeleteConfirmScreen
tree = self.query_one("#file_tree", Tree)
node = tree.cursor_node
if not (node and node.data and isinstance(node.data, Path)):
self.notify("Please select a file first", severity="warning", timeout=3)
return
# Check if file exists
if not node.data.exists():
self.notify(f"File no longer exists: {node.data.name}", severity="error", timeout=3)
return
try:
if not node.data.is_file():
self.notify("Please select a file first", severity="warning", timeout=3)
return
except (FileNotFoundError, OSError) as e:
self.notify(f"Error accessing file: {e}", severity="error", timeout=3)
return
file_path = node.data
# Show confirmation screen
self.push_screen(DeleteConfirmScreen(file_path))
async def action_expand(self): async def action_expand(self):
tree = self.query_one("#file_tree", Tree) tree = self.query_one("#file_tree", Tree)
@@ -264,10 +629,10 @@ class RenamerApp(App):
def update_renamed_file(self, old_path: Path, new_path: Path): def update_renamed_file(self, old_path: Path, new_path: Path):
"""Update the tree node for a renamed file.""" """Update the tree node for a renamed file."""
logging.info(f"update_renamed_file called with old_path={old_path}, new_path={new_path}") logging.info(f"update_renamed_file called with old_path={old_path}, new_path={new_path}")
tree = self.query_one("#file_tree", Tree) tree = self.query_one("#file_tree", Tree)
logging.info(f"Before update: cursor_node.data = {tree.cursor_node.data if tree.cursor_node else None}") logging.info(f"Before update: cursor_node.data = {tree.cursor_node.data if tree.cursor_node else None}")
# Update only the specific node # Update only the specific node
def find_node(node): def find_node(node):
if node.data == old_path: if node.data == old_path:
@@ -277,11 +642,13 @@ class RenamerApp(App):
if found: if found:
return found return found
return None return None
node = find_node(tree.root) node = find_node(tree.root)
if node: if node:
logging.info(f"Found node for {old_path}, updating to {new_path.name}") logging.info(f"Found node for {old_path}, updating to {new_path.name}")
node.label = escape(new_path.name) # Update label with icon
icon = self._get_file_icon(new_path)
node.label = f"{icon} {escape(new_path.name)}"
node.data = new_path node.data = new_path
logging.info(f"After update: node.data = {node.data}, node.label = {node.label}") logging.info(f"After update: node.data = {node.data}, node.label = {node.label}")
# Ensure cursor stays on the renamed file # Ensure cursor stays on the renamed file
@@ -289,9 +656,9 @@ class RenamerApp(App):
logging.info(f"Selected node: {tree.cursor_node.data if tree.cursor_node else None}") logging.info(f"Selected node: {tree.cursor_node.data if tree.cursor_node else None}")
else: else:
logging.info(f"No node found for {old_path}") logging.info(f"No node found for {old_path}")
logging.info(f"After update: cursor_node.data = {tree.cursor_node.data if tree.cursor_node else None}") logging.info(f"After update: cursor_node.data = {tree.cursor_node.data if tree.cursor_node else None}")
# Refresh the details if the node is currently selected # Refresh the details if the node is currently selected
if tree.cursor_node and tree.cursor_node.data == new_path: if tree.cursor_node and tree.cursor_node.data == new_path:
logging.info("Refreshing details for renamed file") logging.info("Refreshing details for renamed file")
@@ -302,6 +669,189 @@ class RenamerApp(App):
else: else:
logging.info("Not refreshing details, cursor not on renamed file") logging.info("Not refreshing details, cursor not on renamed file")
def add_file_to_tree(self, file_path: Path):
"""Add a new file to the tree in the correct position.
Args:
file_path: Path to the new file to add
"""
logging.info(f"add_file_to_tree called with file_path={file_path}")
tree = self.query_one("#file_tree", Tree)
parent_dir = file_path.parent
logging.info(f"Looking for parent directory node: {parent_dir}")
logging.info(f"Scan directory: {self.scan_dir}")
# Check if parent directory is the scan directory (root level)
# If so, the parent node is the tree root itself
parent_node = None
if self.scan_dir and parent_dir.resolve() == self.scan_dir.resolve():
logging.info("File is in root scan directory, using tree.root as parent")
parent_node = tree.root
else:
# Find the parent directory node in the tree
def find_node(node, depth=0):
if node.data and isinstance(node.data, Path):
logging.info(f"{' ' * depth}Checking node: data={node.data}")
# Resolve both paths to absolute for comparison
if node.data.resolve() == parent_dir.resolve():
logging.info(f"{' ' * depth}Found match! node.data={node.data}")
return node
for child in node.children:
found = find_node(child, depth + 1)
if found:
return found
return None
parent_node = find_node(tree.root)
if parent_node:
logging.info(f"Found parent node for {parent_dir}, adding file {file_path.name}")
# Get icon for the file
icon = self._get_file_icon(file_path)
label = f"{icon} {escape(file_path.name)}"
# Add the new file node in alphabetically sorted position
new_node = None
inserted = False
for i, child in enumerate(parent_node.children):
if child.data and isinstance(child.data, Path):
# Compare filenames for sorting
if child.data.name > file_path.name:
# Insert before this child
new_node = parent_node.add(label, data=file_path, before=i)
inserted = True
logging.info(f"Inserted file before {child.data.name}")
break
# If not inserted, add at the end
if not inserted:
new_node = parent_node.add(label, data=file_path)
logging.info(f"Added file at end of directory")
# Select the new node and show its details
if new_node:
tree.select_node(new_node)
logging.info(f"Selected new node: {new_node.data}")
# Refresh the details panel for the new file
self._start_loading_animation()
threading.Thread(
target=self._extract_and_show_details, args=(file_path,)
).start()
else:
logging.warning(f"No parent node found for {parent_dir}")
logging.warning(f"Rescanning entire tree instead")
# If we can't find the parent node, rescan the tree and try to select the new file
tree = self.query_one("#file_tree", Tree)
current_selection = tree.cursor_node.data if tree.cursor_node else None
self.scan_files()
# Try to restore selection to the new file, or the old selection, or parent dir
def find_and_select(node, target_path):
if node.data and isinstance(node.data, Path):
if node.data.resolve() == target_path.resolve():
tree.select_node(node)
return True
for child in node.children:
if find_and_select(child, target_path):
return True
return False
# Try to select the new file first
if not find_and_select(tree.root, file_path):
# If that fails, try to restore previous selection
if current_selection:
find_and_select(tree.root, current_selection)
# Refresh details panel for selected node
if tree.cursor_node and tree.cursor_node.data:
self._start_loading_animation()
threading.Thread(
target=self._extract_and_show_details, args=(tree.cursor_node.data,)
).start()
def remove_file_from_tree(self, file_path: Path):
"""Remove a file from the tree.
Args:
file_path: Path to the file to remove
"""
logging.info(f"remove_file_from_tree called with file_path={file_path}")
tree = self.query_one("#file_tree", Tree)
# Find the node to remove
def find_node(node):
if node.data and isinstance(node.data, Path):
if node.data.resolve() == file_path.resolve():
return node
for child in node.children:
found = find_node(child)
if found:
return found
return None
node_to_remove = find_node(tree.root)
if node_to_remove:
logging.info(f"Found node to remove: {node_to_remove.data}")
# Find the parent node to select after deletion
parent_node = node_to_remove.parent
next_node = None
# Try to select next sibling, or previous sibling, or parent
if parent_node:
siblings = list(parent_node.children)
try:
current_index = siblings.index(node_to_remove)
# Try next sibling first
if current_index + 1 < len(siblings):
next_node = siblings[current_index + 1]
# Try previous sibling
elif current_index > 0:
next_node = siblings[current_index - 1]
# Fall back to parent
else:
next_node = parent_node if parent_node != tree.root else None
except ValueError:
pass
# Remove the node
node_to_remove.remove()
logging.info(f"Removed node from tree")
# Select the next appropriate node
if next_node:
tree.select_node(next_node)
logging.info(f"Selected next node: {next_node.data}")
# Refresh details if it's a file
if next_node.data and isinstance(next_node.data, Path) and next_node.data.is_file():
self._start_loading_animation()
threading.Thread(
target=self._extract_and_show_details, args=(next_node.data,)
).start()
else:
# Clear details panel
details = self.query_one("#details_technical", Static)
details.update("Select a file to view details")
proposed = self.query_one("#proposed", Static)
proposed.update("")
else:
# No node to select, clear details
details = self.query_one("#details_technical", Static)
details.update("No files in directory")
proposed = self.query_one("#proposed", Static)
proposed.update("")
else:
logging.warning(f"Node not found for {file_path}")
def on_key(self, event): def on_key(self, event):
if event.key == "right": if event.key == "right":
tree = self.query_one("#file_tree", Tree) tree = self.query_one("#file_tree", Tree)

View File

@@ -1,235 +0,0 @@
import json
import os
import time
import hashlib
import pickle
from pathlib import Path
from typing import Any, Optional
class Cache:
"""File-based cache with TTL support."""
def __init__(self, cache_dir: Optional[Path] = None):
# Always use the default cache dir to avoid creating cache in scan dir
cache_dir = Path.home() / ".cache" / "renamer"
self.cache_dir = cache_dir
self.cache_dir.mkdir(parents=True, exist_ok=True)
self._memory_cache = {} # In-memory cache for faster access
def _get_cache_file(self, key: str) -> Path:
"""Get cache file path with hashed filename and subdirs."""
import logging
logging.info(f"Cache _get_cache_file called with key: {key!r}")
# Parse key format: ClassName.method_name.param_hash
if '.' in key:
parts = key.split('.')
if len(parts) >= 3:
class_name = parts[0]
method_name = parts[1]
param_hash = parts[2]
# Use class name as subdir, but if it contains '/', use general to avoid creating nested dirs
if '/' in class_name or '\\' in class_name:
subdir = "general"
subkey = key
file_ext = "json"
else:
subdir = class_name
file_ext = "pkl"
# Use class name as subdir
cache_subdir = self.cache_dir / subdir
logging.info(f"Cache parsed key, class_name: {class_name!r}, cache_subdir: {cache_subdir!r}")
cache_subdir.mkdir(parents=True, exist_ok=True)
if file_ext == "pkl":
# Use method_name.param_hash as filename
return cache_subdir / f"{method_name}.{param_hash}.pkl"
else:
# Hash the subkey for filename
key_hash = hashlib.md5(subkey.encode('utf-8')).hexdigest()
return cache_subdir / f"{key_hash}.json"
# Fallback for old keys (tmdb_, poster_, etc.)
if key.startswith("tmdb_"):
subdir = "tmdb"
subkey = key[5:] # Remove "tmdb_" prefix
elif key.startswith("poster_"):
subdir = "posters"
subkey = key[7:] # Remove "poster_" prefix
elif key.startswith("extractor_"):
subdir = "extractors"
subkey = key[10:] # Remove "extractor_" prefix
else:
subdir = "general"
subkey = key
# Create subdir
cache_subdir = self.cache_dir / subdir
logging.info(f"Cache fallback, subdir: {subdir!r}, cache_subdir: {cache_subdir!r}")
cache_subdir.mkdir(parents=True, exist_ok=True)
# Hash the subkey for filename
key_hash = hashlib.md5(subkey.encode('utf-8')).hexdigest()
return cache_subdir / f"{key_hash}.json"
def get(self, key: str) -> Optional[Any]:
"""Get cached value if not expired."""
# Check memory cache first
if key in self._memory_cache:
data = self._memory_cache[key]
if time.time() > data.get('expires', 0):
del self._memory_cache[key]
return None
return data.get('value')
cache_file = self._get_cache_file(key)
if not cache_file.exists():
return None
try:
with open(cache_file, 'r') as f:
data = json.load(f)
if time.time() > data.get('expires', 0):
# Expired, remove file
cache_file.unlink(missing_ok=True)
return None
# Store in memory cache
self._memory_cache[key] = data
return data.get('value')
except (json.JSONDecodeError, IOError):
# Corrupted, remove
cache_file.unlink(missing_ok=True)
return None
def set(self, key: str, value: Any, ttl_seconds: int) -> None:
"""Set cached value with TTL."""
data = {
'value': value,
'expires': time.time() + ttl_seconds
}
# Store in memory cache
self._memory_cache[key] = data
cache_file = self._get_cache_file(key)
try:
with open(cache_file, 'w') as f:
json.dump(data, f)
except IOError:
pass # Silently fail
def invalidate(self, key: str) -> None:
"""Remove cache entry."""
cache_file = self._get_cache_file(key)
cache_file.unlink(missing_ok=True)
def get_image(self, key: str) -> Optional[Path]:
"""Get cached image path if not expired."""
cache_file = self._get_cache_file(key)
if not cache_file.exists():
return None
try:
with open(cache_file, 'r') as f:
data = json.load(f)
if time.time() > data.get('expires', 0):
# Expired, remove file and image
image_path = data.get('image_path')
if image_path and Path(image_path).exists():
Path(image_path).unlink(missing_ok=True)
cache_file.unlink(missing_ok=True)
return None
image_path = data.get('image_path')
if image_path and Path(image_path).exists():
return Path(image_path)
return None
except (json.JSONDecodeError, IOError):
cache_file.unlink(missing_ok=True)
return None
def set_image(self, key: str, image_data: bytes, ttl_seconds: int) -> Optional[Path]:
"""Set cached image and return path."""
# Determine subdir and subkey
if key.startswith("poster_"):
subdir = "posters"
subkey = key[7:]
else:
subdir = "images"
subkey = key
# Create subdir
image_dir = self.cache_dir / subdir
image_dir.mkdir(parents=True, exist_ok=True)
# Hash for filename
key_hash = hashlib.md5(subkey.encode('utf-8')).hexdigest()
image_path = image_dir / f"{key_hash}.jpg"
try:
with open(image_path, 'wb') as f:
f.write(image_data)
# Cache metadata
data = {
'image_path': str(image_path),
'expires': time.time() + ttl_seconds
}
cache_file = self._get_cache_file(key)
with open(cache_file, 'w') as f:
json.dump(data, f)
return image_path
except IOError:
return None
def get_object(self, key: str) -> Optional[Any]:
"""Get pickled object from cache if not expired."""
# Check memory cache first
if key in self._memory_cache:
data = self._memory_cache[key]
if time.time() > data.get('expires', 0):
del self._memory_cache[key]
return None
return data.get('value')
cache_file = self._get_cache_file(key)
if not cache_file.exists():
return None
try:
with open(cache_file, 'rb') as f:
data = pickle.load(f)
if time.time() > data.get('expires', 0):
# Expired, remove file
cache_file.unlink(missing_ok=True)
return None
# Store in memory cache
self._memory_cache[key] = data
return data.get('value')
except (pickle.PickleError, IOError):
# Corrupted, remove
cache_file.unlink(missing_ok=True)
return None
def set_object(self, key: str, obj: Any, ttl_seconds: int) -> None:
"""Pickle and cache object with TTL."""
data = {
'value': obj,
'expires': time.time() + ttl_seconds
}
# Store in memory cache
self._memory_cache[key] = data
cache_file = self._get_cache_file(key)
try:
with open(cache_file, 'wb') as f:
pickle.dump(data, f)
except IOError:
pass # Silently fail

107
renamer/cache/__init__.py vendored Normal file
View File

@@ -0,0 +1,107 @@
"""Unified caching subsystem for Renamer.
This module provides a flexible caching system with:
- Multiple cache key generation strategies
- Decorators for easy method caching
- Cache management and statistics
- Thread-safe operations
- In-memory and file-based caching with TTL
Usage Examples:
# Using decorators
from renamer.cache import cached, cached_api
class MyExtractor:
def __init__(self, file_path, cache, settings):
self.file_path = file_path
self.cache = cache
self.settings = settings
@cached(ttl=3600)
def extract_data(self):
# Automatically cached using FilepathMethodStrategy
return expensive_operation()
@cached_api("tmdb", ttl=21600)
def fetch_movie_data(self, movie_id):
# Cached API response
return api_call(movie_id)
# Using cache manager
from renamer.cache import Cache, CacheManager
cache = Cache()
manager = CacheManager(cache)
# Get statistics
stats = manager.get_stats()
print(f"Total cache size: {stats['total_size_mb']} MB")
# Clear all cache
manager.clear_all()
# Clear specific prefix
manager.clear_by_prefix("tmdb_")
"""
from .core import Cache
from .managers import CacheManager
from .strategies import (
CacheKeyStrategy,
FilepathMethodStrategy,
APIRequestStrategy,
SimpleKeyStrategy,
CustomStrategy
)
from .decorators import (
cached,
cached_method,
cached_api,
cached_property
)
from .types import CacheEntry, CacheStats
__all__ = [
# Core cache
'Cache',
'CacheManager',
# Strategies
'CacheKeyStrategy',
'FilepathMethodStrategy',
'APIRequestStrategy',
'SimpleKeyStrategy',
'CustomStrategy',
# Decorators
'cached',
'cached_method',
'cached_api',
'cached_property',
# Types
'CacheEntry',
'CacheStats',
# Convenience functions
'create_cache',
]
def create_cache(cache_dir=None):
"""Create a Cache instance with Manager (convenience function).
Args:
cache_dir: Optional cache directory path
Returns:
tuple: (Cache instance, CacheManager instance)
Example:
cache, manager = create_cache()
stats = manager.get_stats()
print(f"Cache has {stats['total_files']} files")
"""
cache = Cache(cache_dir)
manager = CacheManager(cache)
return cache, manager

448
renamer/cache/core.py vendored Normal file
View File

@@ -0,0 +1,448 @@
import json
import logging
import threading
import time
import hashlib
import pickle
from pathlib import Path
from typing import Any, Optional, Dict
# Configure logger
logger = logging.getLogger(__name__)
class Cache:
"""Thread-safe file-based cache with TTL support (Singleton)."""
_instance: Optional['Cache'] = None
_lock_init = threading.Lock()
def __new__(cls, cache_dir: Optional[Path] = None):
"""Create or return singleton instance."""
if cls._instance is None:
with cls._lock_init:
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self, cache_dir: Optional[Path] = None):
"""Initialize cache with optional custom directory (only once).
Args:
cache_dir: Optional cache directory path. Defaults to ~/.cache/renamer/
"""
# Only initialize once
if self._initialized:
return
# Always use the default cache dir to avoid creating cache in scan dir
if cache_dir is None:
cache_dir = Path.home() / ".cache" / "renamer"
self.cache_dir = cache_dir
self.cache_dir.mkdir(parents=True, exist_ok=True)
self._memory_cache: Dict[str, Dict[str, Any]] = {} # In-memory cache for faster access
self._lock = threading.RLock() # Reentrant lock for thread safety
self._initialized = True
def _sanitize_key_component(self, component: str) -> str:
"""Sanitize a key component to prevent filesystem escaping.
Args:
component: Key component to sanitize
Returns:
Sanitized component safe for filesystem use
"""
# Remove or replace dangerous characters
dangerous_chars = ['/', '\\', '..', '\0']
sanitized = component
for char in dangerous_chars:
sanitized = sanitized.replace(char, '_')
return sanitized
def _get_cache_file(self, key: str) -> Path:
"""Get cache file path with organized subdirectories.
Supports two key formats:
1. Prefixed keys: "tmdb_id123", "poster_xyz" -> subdirectories
2. Plain keys: "anykey" -> general subdirectory
Args:
key: Cache key
Returns:
Path to cache file
"""
# Determine subdirectory and subkey based on prefix
if key.startswith("tmdb_"):
subdir = "tmdb"
subkey = key[5:] # Remove "tmdb_" prefix
elif key.startswith("poster_"):
subdir = "posters"
subkey = key[7:] # Remove "poster_" prefix
elif key.startswith("extractor_"):
subdir = "extractors"
subkey = key[10:] # Remove "extractor_" prefix
else:
# Default to general subdirectory
subdir = "general"
subkey = key
# Sanitize subdirectory name
subdir = self._sanitize_key_component(subdir)
# Create subdirectory
cache_subdir = self.cache_dir / subdir
cache_subdir.mkdir(parents=True, exist_ok=True)
# Hash the subkey for filename (prevents filesystem issues with long/special names)
key_hash = hashlib.md5(subkey.encode('utf-8')).hexdigest()
# Use .json extension for all cache files (simplifies logic)
return cache_subdir / f"{key_hash}.json"
def get(self, key: str, default: Any = None) -> Any:
"""Get cached value if not expired (thread-safe).
Args:
key: Cache key
default: Value to return if key not found or expired
Returns:
Cached value or default if not found/expired
"""
with self._lock:
# Check memory cache first
if key in self._memory_cache:
data = self._memory_cache[key]
if time.time() <= data.get('expires', 0):
return data.get('value')
else:
# Expired, remove from memory
del self._memory_cache[key]
logger.debug(f"Memory cache expired for key: {key}")
# Check file cache
cache_file = self._get_cache_file(key)
if not cache_file.exists():
return default
try:
with open(cache_file, 'r') as f:
data = json.load(f)
if time.time() > data.get('expires', 0):
# Expired, remove file
cache_file.unlink(missing_ok=True)
logger.debug(f"File cache expired for key: {key}, removed {cache_file}")
return default
# Store in memory cache for faster future access
self._memory_cache[key] = data
return data.get('value')
except json.JSONDecodeError as e:
# Corrupted JSON, remove file
logger.warning(f"Corrupted cache file {cache_file}: {e}")
cache_file.unlink(missing_ok=True)
return default
except IOError as e:
# File read error
logger.error(f"Failed to read cache file {cache_file}: {e}")
return default
def set(self, key: str, value: Any, ttl_seconds: int) -> None:
"""Set cached value with TTL (thread-safe).
Args:
key: Cache key
value: Value to cache (must be JSON-serializable)
ttl_seconds: Time-to-live in seconds
"""
with self._lock:
data = {
'value': value,
'expires': time.time() + ttl_seconds
}
# Store in memory cache
self._memory_cache[key] = data
# Store in file cache
cache_file = self._get_cache_file(key)
try:
with open(cache_file, 'w') as f:
json.dump(data, f, indent=2)
logger.debug(f"Cached key: {key} to {cache_file} (TTL: {ttl_seconds}s)")
except (IOError, TypeError) as e:
logger.error(f"Failed to write cache file {cache_file}: {e}")
def invalidate(self, key: str) -> None:
"""Remove cache entry (thread-safe).
Args:
key: Cache key to invalidate
"""
with self._lock:
# Remove from memory cache
if key in self._memory_cache:
del self._memory_cache[key]
# Remove from file cache
cache_file = self._get_cache_file(key)
if cache_file.exists():
cache_file.unlink(missing_ok=True)
logger.debug(f"Invalidated cache for key: {key}")
def invalidate_file(self, file_path: Path) -> int:
"""Invalidate all cache entries for a specific file path.
This invalidates all extractor method caches for the given file by:
1. Clearing matching keys from memory cache
2. Removing matching keys from file cache
Args:
file_path: File path to invalidate cache for
Returns:
Number of cache entries invalidated
"""
with self._lock:
# Generate the path hash used in cache keys
path_hash = hashlib.md5(str(file_path).encode()).hexdigest()[:12]
prefix = f"extractor_{path_hash}_"
invalidated_count = 0
# Remove from memory cache (easy - just check prefix)
keys_to_remove = [k for k in self._memory_cache.keys() if k.startswith(prefix)]
for key in keys_to_remove:
del self._memory_cache[key]
invalidated_count += 1
logger.debug(f"Invalidated memory cache for key: {key}")
# For file cache, we need to invalidate all known extractor methods
# List of all cached extractor methods
extractor_methods = [
'extract_title', 'extract_year', 'extract_source', 'extract_video_codec',
'extract_audio_codec', 'extract_frame_class', 'extract_hdr', 'extract_order',
'extract_special_info', 'extract_movie_db', 'extract_extension',
'extract_video_tracks', 'extract_audio_tracks', 'extract_subtitle_tracks',
'extract_interlaced', 'extract_size', 'extract_duration', 'extract_bitrate',
'extract_created', 'extract_modified'
]
# Invalidate each possible cache key
for method in extractor_methods:
cache_key = f"extractor_{path_hash}_{method}"
cache_file = self._get_cache_file(cache_key)
if cache_file.exists():
cache_file.unlink(missing_ok=True)
invalidated_count += 1
logger.debug(f"Invalidated file cache for key: {cache_key}")
logger.info(f"Invalidated {invalidated_count} cache entries for file: {file_path.name}")
return invalidated_count
def get_image(self, key: str) -> Optional[Path]:
"""Get cached image path if not expired (thread-safe).
Args:
key: Cache key
Returns:
Path to cached image or None if not found/expired
"""
with self._lock:
cache_file = self._get_cache_file(key)
if not cache_file.exists():
return None
try:
with open(cache_file, 'r') as f:
data = json.load(f)
if time.time() > data.get('expires', 0):
# Expired, remove file and image
image_path = data.get('image_path')
if image_path and Path(image_path).exists():
Path(image_path).unlink(missing_ok=True)
cache_file.unlink(missing_ok=True)
logger.debug(f"Image cache expired for key: {key}")
return None
image_path = data.get('image_path')
if image_path and Path(image_path).exists():
return Path(image_path)
else:
logger.warning(f"Image path in cache but file missing: {image_path}")
return None
except (json.JSONDecodeError, IOError) as e:
logger.warning(f"Failed to read image cache {cache_file}: {e}")
cache_file.unlink(missing_ok=True)
return None
def set_image(self, key: str, image_data: bytes, ttl_seconds: int) -> Optional[Path]:
"""Set cached image and return path (thread-safe).
Args:
key: Cache key
image_data: Image binary data
ttl_seconds: Time-to-live in seconds
Returns:
Path to saved image or None if failed
"""
with self._lock:
# Determine subdirectory for image storage
if key.startswith("poster_"):
subdir = "posters"
subkey = key[7:]
else:
subdir = "images"
subkey = key
# Create image directory
image_dir = self.cache_dir / subdir
image_dir.mkdir(parents=True, exist_ok=True)
# Hash for filename
key_hash = hashlib.md5(subkey.encode('utf-8')).hexdigest()
image_path = image_dir / f"{key_hash}.jpg"
try:
# Write image data
with open(image_path, 'wb') as f:
f.write(image_data)
# Cache metadata
data = {
'image_path': str(image_path),
'expires': time.time() + ttl_seconds
}
cache_file = self._get_cache_file(key)
with open(cache_file, 'w') as f:
json.dump(data, f, indent=2)
logger.debug(f"Cached image for key: {key} at {image_path} (TTL: {ttl_seconds}s)")
return image_path
except IOError as e:
logger.error(f"Failed to cache image for key {key}: {e}")
return None
def get_object(self, key: str) -> Optional[Any]:
"""Get pickled object from cache if not expired (thread-safe).
Note: This uses a separate .pkl file format for objects that can't be JSON-serialized.
Args:
key: Cache key
Returns:
Cached object or None if not found/expired
"""
with self._lock:
# Check memory cache first
if key in self._memory_cache:
data = self._memory_cache[key]
if time.time() <= data.get('expires', 0):
return data.get('value')
else:
del self._memory_cache[key]
logger.debug(f"Memory cache expired for pickled object: {key}")
# Get cache file path but change extension to .pkl
cache_file = self._get_cache_file(key).with_suffix('.pkl')
if not cache_file.exists():
return None
try:
with open(cache_file, 'rb') as f:
data = pickle.load(f)
if time.time() > data.get('expires', 0):
# Expired, remove file
cache_file.unlink(missing_ok=True)
logger.debug(f"Pickled cache expired for key: {key}")
return None
# Store in memory cache
self._memory_cache[key] = data
return data.get('value')
except (pickle.PickleError, IOError) as e:
# Corrupted or read error, remove
logger.warning(f"Corrupted pickle cache {cache_file}: {e}")
cache_file.unlink(missing_ok=True)
return None
def set_object(self, key: str, obj: Any, ttl_seconds: int) -> None:
"""Pickle and cache object with TTL (thread-safe).
Note: This uses pickle format for objects that can't be JSON-serialized.
Args:
key: Cache key
obj: Object to cache (must be picklable)
ttl_seconds: Time-to-live in seconds
"""
with self._lock:
data = {
'value': obj,
'expires': time.time() + ttl_seconds
}
# Store in memory cache
self._memory_cache[key] = data
# Get cache file path but change extension to .pkl
cache_file = self._get_cache_file(key).with_suffix('.pkl')
try:
with open(cache_file, 'wb') as f:
pickle.dump(data, f)
logger.debug(f"Cached pickled object for key: {key} (TTL: {ttl_seconds}s)")
except (IOError, pickle.PickleError) as e:
logger.error(f"Failed to cache pickled object {cache_file}: {e}")
def clear_expired(self) -> int:
"""Remove all expired cache entries.
Returns:
Number of entries removed
"""
with self._lock:
removed_count = 0
current_time = time.time()
# Clear expired from memory cache
expired_keys = [k for k, v in self._memory_cache.items()
if current_time > v.get('expires', 0)]
for key in expired_keys:
del self._memory_cache[key]
removed_count += 1
# Clear expired from file cache
for cache_file in self.cache_dir.rglob('*'):
if cache_file.is_file() and cache_file.suffix in ['.json', '.pkl']:
try:
if cache_file.suffix == '.json':
with open(cache_file, 'r') as f:
data = json.load(f)
else: # .pkl
with open(cache_file, 'rb') as f:
data = pickle.load(f)
if current_time > data.get('expires', 0):
cache_file.unlink(missing_ok=True)
removed_count += 1
except (json.JSONDecodeError, pickle.PickleError, IOError):
# Corrupted file, remove it
cache_file.unlink(missing_ok=True)
removed_count += 1
logger.info(f"Cleared {removed_count} expired cache entries")
return removed_count

304
renamer/cache/decorators.py vendored Normal file
View File

@@ -0,0 +1,304 @@
"""Cache decorators for easy method caching.
Provides decorators that can be applied to methods for automatic caching
with different strategies.
"""
from functools import wraps
from pathlib import Path
from typing import Callable, Optional, Any
import logging
import json
from .strategies import (
CacheKeyStrategy,
FilepathMethodStrategy,
APIRequestStrategy,
SimpleKeyStrategy
)
logger = logging.getLogger(__name__)
# Sentinel object to distinguish "not in cache" from "cached value is None"
_CACHE_MISS = object()
def cached(
strategy: Optional[CacheKeyStrategy] = None,
ttl: Optional[int] = None,
key_prefix: Optional[str] = None
):
"""Generic cache decorator with strategy pattern.
This is the main caching decorator that supports different strategies
for generating cache keys based on the use case.
Args:
strategy: Cache key generation strategy (defaults to FilepathMethodStrategy)
ttl: Time-to-live in seconds (defaults to settings value or 21600)
key_prefix: Optional prefix for cache key
Returns:
Decorated function with caching
Usage:
@cached(strategy=FilepathMethodStrategy(), ttl=3600)
def extract_title(self):
# Expensive operation
return title
@cached(strategy=APIRequestStrategy(), ttl=21600)
def fetch_tmdb_data(self, movie_id):
# API call
return data
@cached(ttl=7200) # Uses FilepathMethodStrategy by default
def extract_year(self):
return year
Note:
The instance must have a `cache` attribute for caching to work.
If no cache is found, the function executes without caching.
"""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(self, *args, **kwargs):
# Get cache from instance
cache = getattr(self, 'cache', None)
if not cache:
logger.debug(f"No cache found on {self.__class__.__name__}, executing uncached")
return func(self, *args, **kwargs)
# Determine strategy
actual_strategy = strategy or FilepathMethodStrategy()
# Generate cache key based on strategy type
try:
cache_key = _generate_cache_key(
actual_strategy, self, func, args, kwargs, key_prefix
)
except Exception as e:
logger.warning(f"Failed to generate cache key: {e}, executing uncached")
return func(self, *args, **kwargs)
# Check cache (use sentinel to distinguish "not in cache" from "cached None")
cached_value = cache.get(cache_key, _CACHE_MISS)
if cached_value is not _CACHE_MISS:
logger.debug(f"Cache hit for {func.__name__}: {cache_key} (value={cached_value!r})")
return cached_value
# Execute function
logger.debug(f"Cache miss for {func.__name__}: {cache_key}")
result = func(self, *args, **kwargs)
# Determine TTL
actual_ttl = _determine_ttl(self, ttl)
# Cache result (including None - None is valid data meaning "not found")
cache.set(cache_key, result, actual_ttl)
logger.debug(f"Cached {func.__name__}: {cache_key} (TTL: {actual_ttl}s, value={result!r})")
return result
return wrapper
return decorator
def _generate_cache_key(
strategy: CacheKeyStrategy,
instance: Any,
func: Callable,
args: tuple,
kwargs: dict,
key_prefix: Optional[str]
) -> str:
"""Generate cache key based on strategy type.
Args:
strategy: Cache key strategy
instance: Instance the method is called on
func: Function being cached
args: Positional arguments
kwargs: Keyword arguments
key_prefix: Optional key prefix
Returns:
Generated cache key
"""
if isinstance(strategy, FilepathMethodStrategy):
# Extractor pattern: needs file_path attribute
file_path = getattr(instance, 'file_path', None)
if not file_path:
raise ValueError(f"{instance.__class__.__name__} missing file_path attribute")
# Cache by file_path + method_name only (no instance_id)
# This allows cache hits across different extractor instances for the same file
return strategy.generate_key(file_path, func.__name__)
elif isinstance(strategy, APIRequestStrategy):
# API pattern: expects service name in args or uses function name
if args:
service = str(args[0]) if len(args) >= 1 else func.__name__
url = str(args[1]) if len(args) >= 2 else ""
params = args[2] if len(args) >= 3 else kwargs
else:
service = func.__name__
url = ""
params = kwargs
return strategy.generate_key(service, url, params)
elif isinstance(strategy, SimpleKeyStrategy):
# Simple pattern: uses prefix and first arg as identifier
prefix = key_prefix or func.__name__
identifier = str(args[0]) if args else str(kwargs.get('id', 'default'))
return strategy.generate_key(prefix, identifier)
else:
# Custom strategy: pass instance and all args
return strategy.generate_key(instance, *args, **kwargs)
def _determine_ttl(instance: Any, ttl: Optional[int]) -> int:
"""Determine TTL from explicit value or instance settings.
Args:
instance: Instance the method is called on
ttl: Explicit TTL value (takes precedence)
Returns:
TTL in seconds
"""
if ttl is not None:
return ttl
# Try to get from settings
settings = getattr(instance, 'settings', None)
if settings:
return settings.get('cache_ttl_extractors', 21600)
# Default to 6 hours
return 21600
def cached_method(ttl: Optional[int] = None):
"""Decorator for extractor methods (legacy/convenience).
This is an alias for cached() with FilepathMethodStrategy.
Provides backward compatibility with existing code.
Args:
ttl: Time-to-live in seconds
Returns:
Decorated function
Usage:
@cached_method(ttl=3600)
def extract_title(self):
return title
Note:
This is equivalent to:
@cached(strategy=FilepathMethodStrategy(), ttl=3600)
"""
return cached(strategy=FilepathMethodStrategy(), ttl=ttl)
def cached_api(service: str, ttl: Optional[int] = None):
"""Decorator for API response caching.
Specialized decorator for caching API responses. Generates keys
based on service name and request parameters.
Args:
service: Service name (e.g., "tmdb", "imdb", "omdb")
ttl: Time-to-live in seconds (defaults to cache_ttl_{service})
Returns:
Decorated function
Usage:
@cached_api("tmdb", ttl=21600)
def search_movie(self, title, year=None):
# Make API request
response = requests.get(...)
return response.json()
@cached_api("imdb")
def get_movie_details(self, movie_id):
return api_response
Note:
The function args/kwargs are automatically included in the cache key.
"""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(self, *args, **kwargs):
cache = getattr(self, 'cache', None)
if not cache:
logger.debug(f"No cache on {self.__class__.__name__}, executing uncached")
return func(self, *args, **kwargs)
# Build cache key from service + function name + args/kwargs
args_repr = json.dumps({
'args': [str(a) for a in args],
'kwargs': {k: str(v) for k, v in sorted(kwargs.items())}
}, sort_keys=True)
strategy = APIRequestStrategy()
cache_key = strategy.generate_key(service, func.__name__, {'params': args_repr})
# Check cache (use sentinel to distinguish "not in cache" from "cached None")
cached_value = cache.get(cache_key, _CACHE_MISS)
if cached_value is not _CACHE_MISS:
logger.debug(f"API cache hit for {service}.{func.__name__} (value={cached_value!r})")
return cached_value
# Execute function
logger.debug(f"API cache miss for {service}.{func.__name__}")
result = func(self, *args, **kwargs)
# Determine TTL (service-specific or default)
actual_ttl = ttl
if actual_ttl is None:
settings = getattr(self, 'settings', None)
if settings:
# Try service-specific TTL first
actual_ttl = settings.get(f'cache_ttl_{service}',
settings.get('cache_ttl_api', 21600))
else:
actual_ttl = 21600 # Default 6 hours
# Cache result (including None - None is valid data)
cache.set(cache_key, result, actual_ttl)
logger.debug(f"API cached {service}.{func.__name__} (TTL: {actual_ttl}s, value={result!r})")
return result
return wrapper
return decorator
def cached_property(ttl: Optional[int] = None):
"""Decorator for caching property-like methods.
Similar to @property but with caching support.
Args:
ttl: Time-to-live in seconds
Returns:
Decorated function
Usage:
@cached_property(ttl=3600)
def metadata(self):
# Expensive computation
return complex_metadata
Note:
Unlike @property, this still requires parentheses: obj.metadata()
For true property behavior, use @property with manual caching.
"""
return cached(strategy=FilepathMethodStrategy(), ttl=ttl)

241
renamer/cache/managers.py vendored Normal file
View File

@@ -0,0 +1,241 @@
"""Cache management and operations.
Provides high-level cache management functionality including
clearing, statistics, and maintenance operations.
"""
from pathlib import Path
from typing import Dict, Any, Optional
import logging
import time
import json
import pickle
from .types import CacheStats
logger = logging.getLogger(__name__)
class CacheManager:
"""High-level cache management and operations."""
def __init__(self, cache):
"""Initialize manager with cache instance.
Args:
cache: Core Cache instance
"""
self.cache = cache
def clear_all(self) -> int:
"""Clear all cache entries (files and memory).
Returns:
Number of entries removed
"""
count = 0
# Clear all cache files
for cache_file in self.cache.cache_dir.rglob('*'):
if cache_file.is_file():
try:
cache_file.unlink()
count += 1
except (OSError, PermissionError) as e:
logger.warning(f"Failed to remove {cache_file}: {e}")
# Clear memory cache
with self.cache._lock:
mem_count = len(self.cache._memory_cache)
self.cache._memory_cache.clear()
count += mem_count
logger.info(f"Cleared all cache: {count} entries removed")
return count
def clear_by_prefix(self, prefix: str) -> int:
"""Clear cache entries matching prefix.
Args:
prefix: Cache key prefix (e.g., "tmdb", "extractor", "poster")
Returns:
Number of entries removed
Examples:
clear_by_prefix("tmdb_") # Clear all TMDB cache
clear_by_prefix("extractor_") # Clear all extractor cache
"""
count = 0
# Remove trailing underscore if present
subdir = prefix.rstrip('_')
cache_subdir = self.cache.cache_dir / subdir
# Clear files in subdirectory
if cache_subdir.exists():
for cache_file in cache_subdir.rglob('*'):
if cache_file.is_file():
try:
cache_file.unlink()
count += 1
except (OSError, PermissionError) as e:
logger.warning(f"Failed to remove {cache_file}: {e}")
# Clear from memory cache
with self.cache._lock:
keys_to_remove = [k for k in self.cache._memory_cache.keys()
if k.startswith(prefix)]
for key in keys_to_remove:
del self.cache._memory_cache[key]
count += 1
logger.info(f"Cleared cache with prefix '{prefix}': {count} entries removed")
return count
def clear_expired(self) -> int:
"""Clear all expired cache entries.
Delegates to Cache.clear_expired() for implementation.
Returns:
Number of expired entries removed
"""
return self.cache.clear_expired()
def get_stats(self) -> CacheStats:
"""Get comprehensive cache statistics.
Returns:
Dictionary with cache statistics including:
- cache_dir: Path to cache directory
- subdirs: Per-subdirectory statistics
- total_files: Total number of cached files
- total_size_bytes: Total size in bytes
- total_size_mb: Total size in megabytes
- memory_cache_entries: Number of in-memory entries
"""
stats: CacheStats = {
'cache_dir': str(self.cache.cache_dir),
'subdirs': {},
'total_files': 0,
'total_size_bytes': 0,
'total_size_mb': 0.0,
'memory_cache_entries': len(self.cache._memory_cache)
}
# Gather statistics for each subdirectory
if self.cache.cache_dir.exists():
for subdir in self.cache.cache_dir.iterdir():
if subdir.is_dir():
files = list(subdir.rglob('*'))
file_list = [f for f in files if f.is_file()]
file_count = len(file_list)
size = sum(f.stat().st_size for f in file_list)
stats['subdirs'][subdir.name] = {
'files': file_count,
'size_bytes': size,
'size_mb': round(size / (1024 * 1024), 2)
}
stats['total_files'] += file_count
stats['total_size_bytes'] += size
stats['total_size_mb'] = round(stats['total_size_bytes'] / (1024 * 1024), 2)
return stats
def clear_file_cache(self, file_path: Path) -> int:
"""Clear all cache entries for a specific file.
Useful when file is renamed, moved, or modified.
Removes all extractor cache entries associated with the file.
Args:
file_path: Path to file whose cache should be cleared
Returns:
Number of entries removed
Example:
After renaming a file, clear its old cache:
manager.clear_file_cache(old_path)
"""
count = 0
import hashlib
# Generate the same hash used in FilepathMethodStrategy
path_hash = hashlib.md5(str(file_path).encode()).hexdigest()[:12]
# Search in extractor subdirectory
extractor_dir = self.cache.cache_dir / "extractors"
if extractor_dir.exists():
for cache_file in extractor_dir.rglob('*'):
if cache_file.is_file() and path_hash in cache_file.name:
try:
cache_file.unlink()
count += 1
except (OSError, PermissionError) as e:
logger.warning(f"Failed to remove {cache_file}: {e}")
# Clear from memory cache
with self.cache._lock:
keys_to_remove = [k for k in self.cache._memory_cache.keys()
if path_hash in k]
for key in keys_to_remove:
del self.cache._memory_cache[key]
count += 1
logger.info(f"Cleared cache for file {file_path}: {count} entries removed")
return count
def get_cache_age(self, key: str) -> Optional[float]:
"""Get the age of a cache entry in seconds.
Args:
key: Cache key
Returns:
Age in seconds, or None if not cached
"""
cache_file = self.cache._get_cache_file(key)
if not cache_file.exists():
return None
try:
# Check if it's a JSON or pickle file
if cache_file.suffix == '.json':
with open(cache_file, 'r') as f:
data = json.load(f)
else: # .pkl
with open(cache_file, 'rb') as f:
data = pickle.load(f)
expires = data.get('expires', 0)
age = time.time() - (expires - data.get('ttl', 0)) # Approximate
return age if age >= 0 else None
except (json.JSONDecodeError, pickle.PickleError, IOError, KeyError):
return None
def compact_cache(self) -> int:
"""Remove empty subdirectories and organize cache.
Returns:
Number of empty directories removed
"""
count = 0
if self.cache.cache_dir.exists():
for subdir in self.cache.cache_dir.rglob('*'):
if subdir.is_dir():
try:
# Try to remove if empty
subdir.rmdir()
count += 1
logger.debug(f"Removed empty directory: {subdir}")
except OSError:
# Directory not empty or other error
pass
logger.info(f"Compacted cache: removed {count} empty directories")
return count

152
renamer/cache/strategies.py vendored Normal file
View File

@@ -0,0 +1,152 @@
"""Cache key generation strategies.
Provides different strategies for generating cache keys based on use case.
"""
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any, Dict, Optional, Callable
import hashlib
import json
import logging
logger = logging.getLogger(__name__)
class CacheKeyStrategy(ABC):
"""Base class for cache key generation strategies."""
@abstractmethod
def generate_key(self, *args, **kwargs) -> str:
"""Generate cache key from arguments.
Returns:
Cache key string
"""
pass
class FilepathMethodStrategy(CacheKeyStrategy):
"""Generate key from filepath + method name.
Format: extractor_{hash(filepath)}_{method_name}
Usage: Extractor methods that operate on files
Examples:
extractor_a1b2c3d4e5f6_extract_title
extractor_a1b2c3d4e5f6_12345_extract_year (with instance_id)
"""
def generate_key(
self,
file_path: Path,
method_name: str,
instance_id: str = ""
) -> str:
"""Generate cache key from file path and method name.
Args:
file_path: Path to the file being processed
method_name: Name of the method being cached
instance_id: Optional instance identifier for uniqueness
Returns:
Cache key string
"""
# Hash the file path for consistent key length
path_hash = hashlib.md5(str(file_path).encode()).hexdigest()[:12]
if instance_id:
return f"extractor_{path_hash}_{instance_id}_{method_name}"
return f"extractor_{path_hash}_{method_name}"
class APIRequestStrategy(CacheKeyStrategy):
"""Generate key from API request parameters.
Format: api_{service}_{hash(url+params)}
Usage: API responses (TMDB, IMDB, etc.)
Examples:
api_tmdb_a1b2c3d4e5f6
api_imdb_b2c3d4e5f6a1
"""
def generate_key(
self,
service: str,
url: str,
params: Optional[Dict] = None
) -> str:
"""Generate cache key from API request parameters.
Args:
service: Service name (e.g., "tmdb", "imdb")
url: API endpoint URL or path
params: Optional request parameters dictionary
Returns:
Cache key string
"""
# Sort params for consistent hashing
params_str = json.dumps(params or {}, sort_keys=True)
request_data = f"{url}{params_str}"
request_hash = hashlib.md5(request_data.encode()).hexdigest()[:12]
return f"api_{service}_{request_hash}"
class SimpleKeyStrategy(CacheKeyStrategy):
"""Generate key from simple string prefix + identifier.
Format: {prefix}_{identifier}
Usage: Posters, images, simple data
Examples:
poster_movie_12345
image_actor_67890
"""
def generate_key(self, prefix: str, identifier: str) -> str:
"""Generate cache key from prefix and identifier.
Args:
prefix: Key prefix (e.g., "poster", "image")
identifier: Unique identifier
Returns:
Cache key string
"""
# Sanitize identifier for filesystem safety
clean_id = identifier.replace('/', '_').replace('\\', '_').replace('..', '_')
return f"{prefix}_{clean_id}"
class CustomStrategy(CacheKeyStrategy):
"""User-provided custom key generation.
Format: User-defined via callable
Usage: Special cases requiring custom logic
Example:
def my_key_generator(obj, *args):
return f"custom_{obj.id}_{args[0]}"
strategy = CustomStrategy(my_key_generator)
"""
def __init__(self, key_func: Callable[..., str]):
"""Initialize with custom key generation function.
Args:
key_func: Callable that returns cache key string
"""
self.key_func = key_func
def generate_key(self, *args, **kwargs) -> str:
"""Generate cache key using custom function.
Returns:
Cache key string from custom function
"""
return self.key_func(*args, **kwargs)

33
renamer/cache/types.py vendored Normal file
View File

@@ -0,0 +1,33 @@
"""Type definitions for cache subsystem."""
from typing import TypedDict, Any, Dict
class CacheEntry(TypedDict):
"""Type definition for cache entry structure.
Attributes:
value: The cached value (any JSON-serializable type)
expires: Unix timestamp when entry expires
"""
value: Any
expires: float
class CacheStats(TypedDict):
"""Type definition for cache statistics.
Attributes:
cache_dir: Path to cache directory
subdirs: Statistics for each subdirectory
total_files: Total number of cache files
total_size_bytes: Total size in bytes
total_size_mb: Total size in megabytes
memory_cache_entries: Number of entries in memory cache
"""
cache_dir: str
subdirs: Dict[str, Dict[str, Any]]
total_files: int
total_size_bytes: int
total_size_mb: float
memory_cache_entries: int

View File

@@ -1,199 +0,0 @@
MEDIA_TYPES = {
"mkv": {
"description": "Matroska multimedia container",
"meta_type": "Matroska",
"mime": "video/x-matroska",
},
"mk3d": {
"description": "Matroska 3D multimedia container",
"meta_type": "Matroska",
"mime": "video/x-matroska",
},
"avi": {
"description": "Audio Video Interleave",
"meta_type": "AVI",
"mime": "video/x-msvideo",
},
"mov": {
"description": "QuickTime movie",
"meta_type": "QuickTime",
"mime": "video/quicktime",
},
"mp4": {
"description": "MPEG-4 video container",
"meta_type": "MP4",
"mime": "video/mp4",
},
"wmv": {
"description": "Windows Media Video",
"meta_type": "ASF",
"mime": "video/x-ms-wmv",
},
"flv": {"description": "Flash Video", "meta_type": "FLV", "mime": "video/x-flv"},
"webm": {
"description": "WebM multimedia",
"meta_type": "WebM",
"mime": "video/webm",
},
"m4v": {"description": "MPEG-4 video", "meta_type": "MP4", "mime": "video/mp4"},
"3gp": {"description": "3GPP multimedia", "meta_type": "MP4", "mime": "video/3gpp"},
"ogv": {"description": "Ogg Video", "meta_type": "Ogg", "mime": "video/ogg"},
}
SOURCE_DICT = {
"WEB-DL": ["WEB-DL", "WEBRip", "WEB-Rip", "WEB", "WEB-DLRip"],
"BDRip": ["BDRip", "BD-Rip", "BDRIP"],
"BDRemux": ["BDRemux", "BD-Remux", "BDREMUX"],
"DVDRip": ["DVDRip", "DVD-Rip", "DVDRIP"],
"HDTVRip": ["HDTVRip", "HDTV"],
"BluRay": ["BluRay", "BLURAY", "Blu-ray"],
"SATRip": ["SATRip", "SAT-Rip", "SATRIP"],
"VHSRecord": [
"VHSRecord",
"VHS Record",
"VHS-Rip",
"VHSRip",
"VHS",
"VHS Tape",
"VHS-Tape",
],
}
FRAME_CLASSES = {
"480p": {
"nominal_height": 480,
"typical_widths": [640, 704, 720],
"description": "Standard Definition (SD) - DVD quality",
},
"480i": {
"nominal_height": 480,
"typical_widths": [640, 704, 720],
"description": "Standard Definition (SD) interlaced - NTSC quality",
},
"360p": {
"nominal_height": 360,
"typical_widths": [480, 640],
"description": "Low Definition (LD) - 360p",
},
"576p": {
"nominal_height": 576,
"typical_widths": [720, 768],
"description": "PAL Standard Definition (SD) - European DVD quality",
},
"576i": {
"nominal_height": 576,
"typical_widths": [720, 768],
"description": "PAL Standard Definition (SD) interlaced - European quality",
},
"720p": {
"nominal_height": 720,
"typical_widths": [1280],
"description": "High Definition (HD) - 720p HD",
},
"1080p": {
"nominal_height": 1080,
"typical_widths": [1920],
"description": "Full High Definition (FHD) - 1080p HD",
},
"1080i": {
"nominal_height": 1080,
"typical_widths": [1920],
"description": "Full High Definition (FHD) interlaced - 1080i HD",
},
"1440p": {
"nominal_height": 1440,
"typical_widths": [2560],
"description": "Quad High Definition (QHD) - 1440p 2K",
},
"2160p": {
"nominal_height": 2160,
"typical_widths": [3840],
"description": "Ultra High Definition (UHD) - 2160p 4K",
},
"4320p": {
"nominal_height": 4320,
"typical_widths": [7680],
"description": "Ultra High Definition (UHD) - 4320p 8K",
},
}
MOVIE_DB_DICT = {
"tmdb": {
"name": "The Movie Database (TMDb)",
"description": "Community built movie and TV database",
"url": "https://www.themoviedb.org/",
"patterns": ["tmdbid", "tmdb", "tmdbid-", "tmdb-"],
},
"imdb": {
"name": "Internet Movie Database (IMDb)",
"description": "Comprehensive movie, TV, and celebrity database",
"url": "https://www.imdb.com/",
"patterns": ["imdbid", "imdb", "imdbid-", "imdb-"],
},
"trakt": {
"name": "Trakt.tv",
"description": "Service that integrates with media centers for scrobbling",
"url": "https://trakt.tv/",
"patterns": ["traktid", "trakt", "traktid-", "trakt-"],
},
"tvdb": {
"name": "The TV Database (TVDB)",
"description": "Community driven TV database",
"url": "https://thetvdb.com/",
"patterns": ["tvdbid", "tvdb", "tvdbid-", "tvdb-"],
},
}
SPECIAL_EDITIONS = {
"Theatrical Cut": ["Theatrical Cut"],
"Director's Cut": ["Director's Cut", "Director Cut"],
"Extended Edition": ["Extended Edition", "Ultimate Extended Edition"],
"Special Edition": ["Special Edition"],
"Collector's Edition": ["Collector's Edition"],
"Criterion Collection": ["Criterion Collection"],
"Anniversary Edition": ["Anniversary Edition"],
"Redux": ["Redux"],
"Final Cut": ["Final Cut"],
"Alternate Cut": ["Alternate Cut"],
"International Cut": ["International Cut"],
"Restored Edition": [
"Restored Edition",
"Restored Version",
"4K Restoration",
"Restoration",
],
"Remastered": ["Remastered", "Remaster", "HD Remaster"],
"Unrated": ["Unrated"],
"Uncensored": ["Uncensored"],
"Definitive Edition": ["Definitive Edition"],
"Platinum Edition": ["Platinum Edition"],
"Gold Edition": ["Gold Edition"],
"Diamond Edition": ["Diamond Edition"],
"Steelbook Edition": ["Steelbook Edition"],
"Limited Edition": ["Limited Edition"],
"Deluxe Edition": ["Deluxe Edition"],
"Premium Edition": ["Premium Edition"],
"Complete Edition": ["Complete Edition"],
"AI Remaster": ["AI Remaster", "AI Remastered"],
"Upscaled": [
"AI Upscaled",
"AI Enhanced",
"AI Upscale",
"Upscaled",
"Upscale",
"Upscaling",
],
"Director's Definitive Cut": ["Director's Definitive Cut"],
"Extended Director's Cut": ["Extended Director's Cut", "Ultimate Director's Cut"],
"Original Cut": ["Original Cut"],
"Cinematic Cut": ["Cinematic Cut"],
"Roadshow Cut": ["Roadshow Cut"],
"Premiere Cut": ["Premiere Cut"],
"Festival Cut": ["Festival Cut"],
"Workprint": ["Workprint"],
"Rough Cut": ["Rough Cut"],
"Special Assembly Cut": ["Special Assembly Cut"],
"Amazon Edition": ["Amazon Edition", "Amazon", "AMZN"],
"Netflix Edition": ["Netflix Edition"],
"HBO Edition": ["HBO Edition"],
}

View File

@@ -0,0 +1,51 @@
"""Constants package for Renamer.
This package contains constants split into logical modules:
- media_constants.py: Media type definitions (MEDIA_TYPES)
- source_constants.py: Video source types (SOURCE_DICT)
- frame_constants.py: Resolution/frame classes (FRAME_CLASSES)
- moviedb_constants.py: Movie database identifiers (MOVIE_DB_DICT)
- edition_constants.py: Special edition types (SPECIAL_EDITIONS)
- lang_constants.py: Language-related constants (SKIP_WORDS)
- year_constants.py: Year validation (CURRENT_YEAR, MIN_VALID_YEAR, etc.)
- cyrillic_constants.py: Cyrillic character normalization (CYRILLIC_TO_ENGLISH)
"""
# Import from all constant modules
from .media_constants import (
MEDIA_TYPES,
META_TYPE_TO_EXTENSIONS,
get_extension_from_format
)
from .source_constants import SOURCE_DICT
from .frame_constants import FRAME_CLASSES, NON_STANDARD_QUALITY_INDICATORS
from .moviedb_constants import MOVIE_DB_DICT
from .edition_constants import SPECIAL_EDITIONS
from .lang_constants import SKIP_WORDS
from .year_constants import CURRENT_YEAR, MIN_VALID_YEAR, YEAR_FUTURE_BUFFER, is_valid_year
from .cyrillic_constants import CYRILLIC_TO_ENGLISH
__all__ = [
# Media types
'MEDIA_TYPES',
'META_TYPE_TO_EXTENSIONS',
'get_extension_from_format',
# Source types
'SOURCE_DICT',
# Frame classes
'FRAME_CLASSES',
'NON_STANDARD_QUALITY_INDICATORS',
# Movie databases
'MOVIE_DB_DICT',
# Special editions
'SPECIAL_EDITIONS',
# Language constants
'SKIP_WORDS',
# Year validation
'CURRENT_YEAR',
'MIN_VALID_YEAR',
'YEAR_FUTURE_BUFFER',
'is_valid_year',
# Cyrillic normalization
'CYRILLIC_TO_ENGLISH',
]

View File

@@ -0,0 +1,21 @@
"""Cyrillic character normalization constants.
This module contains mappings for normalizing Cyrillic characters to their
English equivalents for parsing filenames.
"""
# Cyrillic to English character mappings
# Used for normalizing Cyrillic characters that look like English letters
CYRILLIC_TO_ENGLISH = {
'р': 'p', # Cyrillic 'er' looks like Latin 'p'
'і': 'i', # Cyrillic 'i' looks like Latin 'i'
'о': 'o', # Cyrillic 'o' looks like Latin 'o'
'с': 'c', # Cyrillic 'es' looks like Latin 'c'
'е': 'e', # Cyrillic 'ie' looks like Latin 'e'
'а': 'a', # Cyrillic 'a' looks like Latin 'a'
'т': 't', # Cyrillic 'te' looks like Latin 't'
'у': 'y', # Cyrillic 'u' looks like Latin 'y'
'к': 'k', # Cyrillic 'ka' looks like Latin 'k'
'х': 'x', # Cyrillic 'ha' looks like Latin 'x
# Add more mappings as needed
}

View File

@@ -0,0 +1,62 @@
"""Special edition constants.
This module defines special edition types (Director's Cut, Extended Edition, etc.)
and their aliases for detection in filenames.
"""
SPECIAL_EDITIONS = {
"Theatrical Cut": ["Theatrical Cut", "Theatrical Reconstruction"],
"Director's Cut": ["Director's Cut", "Director Cut"],
"Extended Cut": ["Extended Cut", "Ultimate Extended Cut", "Extended Edition", "Ultimate Extended Edition"],
"Special Edition": ["Special Edition"],
"Open Matte": ["Open Matte"],
"Collector's Edition": ["Collector's Edition"],
"Criterion Collection": ["Criterion Collection"],
"Anniversary Edition": ["Anniversary Edition"],
"Redux": ["Redux"],
"Final Cut": ["Final Cut"],
"Alternate Cut": ["Alternate Cut"],
"International Cut": ["International Cut"],
"Restored Edition": [
"Restored Edition",
"Restored Version",
"4K Restoration",
"Restoration",
],
"Remastered": ["Remastered", "Remaster", "HD Remaster"],
"Colorized": ["Colorized Edition", "Colourized Edition", "Colorized", "Colourized"],
"Unrated": ["Unrated"],
"Uncensored": ["Uncensored"],
"Definitive Edition": ["Definitive Edition"],
"Platinum Edition": ["Platinum Edition"],
"Gold Edition": ["Gold Edition"],
"Diamond Edition": ["Diamond Edition"],
"Steelbook Edition": ["Steelbook Edition"],
"Limited Edition": ["Limited Edition"],
"Deluxe Edition": ["Deluxe Edition"],
"Premium Edition": ["Premium Edition"],
"Complete Edition": ["Complete Edition"],
"AI Remaster": ["AI Remaster", "AI Remastered"],
"Upscaled": [
"AI Upscaled",
"AI Enhanced",
"AI Upscale",
"Upscaled",
"Upscale",
"Upscaling",
],
"Director's Definitive Cut": ["Director's Definitive Cut"],
"Extended Director's Cut": ["Extended Director's Cut", "Ultimate Director's Cut"],
"Original Cut": ["Original Cut"],
"Cinematic Cut": ["Cinematic Cut"],
"Roadshow Cut": ["Roadshow Cut"],
"Premiere Cut": ["Premiere Cut"],
"Festival Cut": ["Festival Cut"],
"Workprint": ["Workprint"],
"Rough Cut": ["Rough Cut"],
"Special Assembly Cut": ["Special Assembly Cut"],
"Amazon Edition": ["Amazon Edition", "Amazon", "Amazon Prime Edition", "Amazon Prime"],
"Netflix Edition": ["Netflix Edition"],
"HBO Edition": ["HBO Edition"],
"VHS Source": ["VHSRecord", "VHS Record", "VHS Rip", "VHS", "VHS-Rip"],
}

View File

@@ -0,0 +1,74 @@
"""Frame class and resolution constants.
This module defines video resolution frame classes (480p, 720p, 1080p, 4K, 8K, etc.)
and their nominal heights and typical widths.
Also includes non-standard quality indicators that appear in filenames but don't
represent specific resolutions.
"""
# Non-standard quality indicators that don't have specific resolution values
# These are used in filenames to indicate quality but aren't proper frame classes
# When found, we return None instead of trying to classify them
# Note: We have specific frame classes like "2160p" (4K) and "4320p" (8K),
# but when files use just "4K" or "8K" without the "p" suffix, we can't determine
# the exact resolution, so we treat them as non-standard indicators
NON_STANDARD_QUALITY_INDICATORS = ['SD', 'LQ', 'HD', 'QHD', 'FHD', 'FullHD', '4K', '8K']
FRAME_CLASSES = {
"480p": {
"nominal_height": 480,
"typical_widths": [640, 704, 720],
"description": "Standard Definition (SD) - DVD quality",
},
"480i": {
"nominal_height": 480,
"typical_widths": [640, 704, 720],
"description": "Standard Definition (SD) interlaced - NTSC quality",
},
"360p": {
"nominal_height": 360,
"typical_widths": [480, 640],
"description": "Low Definition (LD) - 360p",
},
"576p": {
"nominal_height": 576,
"typical_widths": [720, 768],
"description": "PAL Standard Definition (SD) - European DVD quality",
},
"576i": {
"nominal_height": 576,
"typical_widths": [720, 768],
"description": "PAL Standard Definition (SD) interlaced - European quality",
},
"720p": {
"nominal_height": 720,
"typical_widths": [1280],
"description": "High Definition (HD) - 720p HD",
},
"1080p": {
"nominal_height": 1080,
"typical_widths": [1920],
"description": "Full High Definition (FHD) - 1080p HD",
},
"1080i": {
"nominal_height": 1080,
"typical_widths": [1920],
"description": "Full High Definition (FHD) interlaced - 1080i HD",
},
"1440p": {
"nominal_height": 1440,
"typical_widths": [2560],
"description": "Quad High Definition (QHD) - 1440p 2K",
},
"2160p": {
"nominal_height": 2160,
"typical_widths": [3840],
"description": "Ultra High Definition (UHD) - 2160p 4K",
},
"4320p": {
"nominal_height": 4320,
"typical_widths": [7680],
"description": "Ultra High Definition (UHD) - 4320p 8K",
},
}

View File

@@ -0,0 +1,31 @@
"""Language-related constants for filename parsing.
This module contains sets of words and patterns used to identify and skip
non-language codes when extracting language information from filenames.
"""
# Words to skip when looking for language codes in filenames
# These are common words, file extensions, or technical terms that might
# look like language codes but aren't
SKIP_WORDS = {
# Common English words that might look like language codes (2-3 letters)
'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had',
'her', 'was', 'one', 'our', 'out', 'day', 'get', 'has', 'him', 'his',
'how', 'its', 'may', 'new', 'now', 'old', 'see', 'two', 'way', 'who',
'boy', 'did', 'let', 'put', 'say', 'she', 'too', 'use',
# File extensions (video)
'avi', 'mkv', 'mp4', 'mpg', 'mov', 'wmv', 'flv', 'webm', 'm4v', 'm2ts',
'ts', 'vob', 'iso', 'img',
# Quality/resolution indicators
'sd', 'hd', 'lq', 'qhd', 'uhd', 'p', 'i', 'hdr', 'sdr', '4k', '8k',
'2160p', '1080p', '720p', '480p', '360p', '240p', '144p',
# Source/codec indicators
'web', 'dl', 'rip', 'bluray', 'dvd', 'hdtv', 'bdrip', 'dvdrip', 'xvid',
'divx', 'h264', 'h265', 'x264', 'x265', 'hevc', 'avc',
# Audio codecs
'ma', 'atmos', 'dts', 'aac', 'ac3', 'mp3', 'flac', 'wav', 'wma', 'ogg', 'opus'
}

View File

@@ -0,0 +1,124 @@
"""Media type constants for supported video formats.
This module defines all supported video container formats and their metadata.
Each entry includes the MediaInfo format name for proper detection.
"""
MEDIA_TYPES = {
"mkv": {
"description": "Matroska multimedia container",
"meta_type": "Matroska",
"mime": "video/x-matroska",
"mediainfo_format": "Matroska",
},
"mk3d": {
"description": "Matroska 3D multimedia container",
"meta_type": "Matroska",
"mime": "video/x-matroska",
"mediainfo_format": "Matroska",
},
"avi": {
"description": "Audio Video Interleave",
"meta_type": "AVI",
"mime": "video/x-msvideo",
"mediainfo_format": "AVI",
},
"mov": {
"description": "QuickTime movie",
"meta_type": "QuickTime",
"mime": "video/quicktime",
"mediainfo_format": "QuickTime",
},
"mp4": {
"description": "MPEG-4 video container",
"meta_type": "MP4",
"mime": "video/mp4",
"mediainfo_format": "MPEG-4",
},
"wmv": {
"description": "Windows Media Video",
"meta_type": "ASF",
"mime": "video/x-ms-wmv",
"mediainfo_format": "Windows Media",
},
"flv": {
"description": "Flash Video",
"meta_type": "FLV",
"mime": "video/x-flv",
"mediainfo_format": "Flash Video",
},
"webm": {
"description": "WebM multimedia",
"meta_type": "WebM",
"mime": "video/webm",
"mediainfo_format": "WebM",
},
"m4v": {
"description": "MPEG-4 video",
"meta_type": "MP4",
"mime": "video/mp4",
"mediainfo_format": "MPEG-4",
},
"3gp": {
"description": "3GPP multimedia",
"meta_type": "MP4",
"mime": "video/3gpp",
"mediainfo_format": "MPEG-4",
},
"ogv": {
"description": "Ogg Video",
"meta_type": "Ogg",
"mime": "video/ogg",
"mediainfo_format": "Ogg",
},
"mpg": {
"description": "MPEG video",
"meta_type": "MPEG-PS",
"mime": "video/mpeg",
"mediainfo_format": "MPEG-PS",
},
"mpeg": {
"description": "MPEG video",
"meta_type": "MPEG-PS",
"mime": "video/mpeg",
"mediainfo_format": "MPEG-PS",
},
}
# Reverse mapping: meta_type -> list of extensions
# Built once at module load instead of rebuilding in every extractor instance
META_TYPE_TO_EXTENSIONS = {}
for ext, info in MEDIA_TYPES.items():
meta_type = info.get('meta_type')
if meta_type:
if meta_type not in META_TYPE_TO_EXTENSIONS:
META_TYPE_TO_EXTENSIONS[meta_type] = []
META_TYPE_TO_EXTENSIONS[meta_type].append(ext)
# Reverse mapping: MediaInfo format name -> extension
# Built from MEDIA_TYPES at module load
MEDIAINFO_FORMAT_TO_EXTENSION = {}
for ext, info in MEDIA_TYPES.items():
mediainfo_format = info.get('mediainfo_format')
if mediainfo_format:
# Store only the first (primary) extension for each format
if mediainfo_format not in MEDIAINFO_FORMAT_TO_EXTENSION:
MEDIAINFO_FORMAT_TO_EXTENSION[mediainfo_format] = ext
def get_extension_from_format(format_name: str) -> str | None:
"""Get file extension from MediaInfo format name.
Args:
format_name: Format name as reported by MediaInfo (e.g., "MPEG-4", "Matroska")
Returns:
File extension (e.g., "mp4", "mkv") or None if format is unknown
Example:
>>> get_extension_from_format("MPEG-4")
'mp4'
>>> get_extension_from_format("Matroska")
'mkv'
"""
return MEDIAINFO_FORMAT_TO_EXTENSION.get(format_name)

View File

@@ -0,0 +1,32 @@
"""Movie database identifier constants.
This module defines movie and TV database services (TMDB, IMDB, Trakt, TVDB)
and their identifier patterns.
"""
MOVIE_DB_DICT = {
"tmdb": {
"name": "The Movie Database (TMDb)",
"description": "Community built movie and TV database",
"url": "https://www.themoviedb.org/",
"patterns": ["tmdbid", "tmdb", "tmdbid-", "tmdb-"],
},
"imdb": {
"name": "Internet Movie Database (IMDb)",
"description": "Comprehensive movie, TV, and celebrity database",
"url": "https://www.imdb.com/",
"patterns": ["imdbid", "imdb", "imdbid-", "imdb-"],
},
"trakt": {
"name": "Trakt.tv",
"description": "Service that integrates with media centers for scrobbling",
"url": "https://trakt.tv/",
"patterns": ["traktid", "trakt", "traktid-", "trakt-"],
},
"tvdb": {
"name": "The TV Database (TVDB)",
"description": "Community driven TV database",
"url": "https://thetvdb.com/",
"patterns": ["tvdbid", "tvdb", "tvdbid-", "tvdb-"],
},
}

View File

@@ -0,0 +1,23 @@
"""Video source type constants.
This module defines video source types (WEB-DL, BDRip, etc.) and their aliases.
"""
SOURCE_DICT = {
"WEB-DL": ["WEB-DL", "WEBRip", "WEB-Rip", "WEB", "WEB-DLRip"],
"BDRip": ["BDRip", "BD-Rip", "BDRIP"],
"BDRemux": ["BDRemux", "BD-Remux", "BDREMUX", "REMUX"],
"DVDRip": ["DVDRip", "DVD-Rip", "DVDRIP"],
"HDTVRip": ["HDTVRip", "HDTV"],
"BluRay": ["BluRay", "BLURAY", "Blu-ray"],
"SATRip": ["SATRip", "SAT-Rip", "SATRIP"],
"VHSRecord": [
"VHSRecord",
"VHS Record",
"VHS-Rip",
"VHSRip",
"VHS",
"VHS Tape",
"VHS-Tape",
],
}

View File

@@ -0,0 +1,20 @@
"""Year validation constants for filename parsing.
This module contains constants used for validating years extracted from filenames.
"""
import datetime
# Current year for validation
CURRENT_YEAR = datetime.datetime.now().year
# Minimum valid year for movies/media (start of cinema era)
MIN_VALID_YEAR = 1900
# Allow years slightly into the future (for upcoming releases)
YEAR_FUTURE_BUFFER = 10
# Valid year range: MIN_VALID_YEAR to (CURRENT_YEAR + YEAR_FUTURE_BUFFER)
def is_valid_year(year: int) -> bool:
"""Check if a year is within the valid range for media files."""
return MIN_VALID_YEAR <= year <= CURRENT_YEAR + YEAR_FUTURE_BUFFER

View File

@@ -1,4 +0,0 @@
# Decorators package
from .caching import cached_method
__all__ = ['cached_method']

View File

@@ -1,57 +0,0 @@
"""Caching decorators for extractors."""
import hashlib
import json
from pathlib import Path
from typing import Any, Callable, Optional
from renamer.cache import Cache
# Global cache instance
_cache = Cache()
def cached_method(ttl_seconds: int = 3600) -> Callable:
"""Decorator to cache method results with TTL.
Caches the result of a method call using a global file-based cache.
The cache key includes class name, method name, instance identifier, and parameters hash.
Args:
ttl_seconds: Time to live for cached results in seconds (default 1 hour)
Returns:
The decorated method with caching
"""
def decorator(func: Callable) -> Callable:
def wrapper(self, *args, **kwargs) -> Any:
# Generate cache key: class_name.method_name.instance_id.param_hash
class_name = self.__class__.__name__
method_name = func.__name__
# Use instance identifier (file_path for extractors)
instance_id = getattr(self, 'file_path', str(id(self)))
# If instance_id contains path separators, hash it to avoid creating subdirs
if '/' in str(instance_id) or '\\' in str(instance_id):
instance_id = hashlib.md5(str(instance_id).encode('utf-8')).hexdigest()
# Create hash from args and kwargs only if they exist (excluding self)
if args or kwargs:
param_str = json.dumps((args, kwargs), sort_keys=True, default=str)
param_hash = hashlib.md5(param_str.encode('utf-8')).hexdigest()
cache_key = f"{class_name}.{method_name}.{instance_id}.{param_hash}"
else:
cache_key = f"{class_name}.{method_name}.{instance_id}"
# Try to get from cache
cached_result = _cache.get_object(cache_key)
if cached_result is not None:
return cached_result
# Compute result and cache it
result = func(self, *args, **kwargs)
_cache.set_object(cache_key, result, ttl_seconds)
return result
return wrapper
return decorator

View File

@@ -0,0 +1,25 @@
"""Extractors package - provides metadata extraction from media files.
This package contains various extractor classes that extract metadata from
different sources (filename, MediaInfo, file system, TMDB API, etc.).
All extractors should implement the DataExtractor protocol defined in base.py.
"""
from .base import DataExtractor
from .default_extractor import DefaultExtractor
from .filename_extractor import FilenameExtractor
from .fileinfo_extractor import FileInfoExtractor
from .mediainfo_extractor import MediaInfoExtractor
from .metadata_extractor import MetadataExtractor
from .tmdb_extractor import TMDBExtractor
__all__ = [
'DataExtractor',
'DefaultExtractor',
'FilenameExtractor',
'FileInfoExtractor',
'MediaInfoExtractor',
'MetadataExtractor',
'TMDBExtractor',
]

218
renamer/extractors/base.py Normal file
View File

@@ -0,0 +1,218 @@
"""Base classes and protocols for extractors.
This module defines the DataExtractor Protocol that all extractors should implement.
The protocol ensures a consistent interface across all extractor types.
"""
from pathlib import Path
from typing import Protocol, Optional
class DataExtractor(Protocol):
"""Protocol defining the standard interface for all extractors.
All extractor classes should implement this protocol to ensure consistent
behavior across the application. The protocol defines methods for extracting
various metadata from media files.
Attributes:
file_path: Path to the file being analyzed
Example:
class MyExtractor:
def __init__(self, file_path: Path):
self.file_path = file_path
def extract_title(self) -> Optional[str]:
# Implementation here
return "Movie Title"
"""
file_path: Path
def extract_title(self) -> Optional[str]:
"""Extract the title of the media file.
Returns:
The extracted title or None if not available
"""
...
def extract_year(self) -> Optional[str]:
"""Extract the release year.
Returns:
The year as a string (e.g., "2024") or None if not available
"""
...
def extract_source(self) -> Optional[str]:
"""Extract the source/release type (e.g., BluRay, WEB-DL, HDTV).
Returns:
The source type or None if not available
"""
...
def extract_order(self) -> Optional[str]:
"""Extract ordering information (e.g., episode number, disc number).
Returns:
The order information or None if not available
"""
...
def extract_resolution(self) -> Optional[str]:
"""Extract the video resolution (e.g., 1080p, 2160p, 720p).
Returns:
The resolution or None if not available
"""
...
def extract_hdr(self) -> Optional[str]:
"""Extract HDR information (e.g., HDR10, Dolby Vision).
Returns:
The HDR format or None if not available
"""
...
def extract_movie_db(self) -> Optional[str]:
"""Extract movie database IDs (e.g., TMDB, IMDB).
Returns:
Database identifiers or None if not available
"""
...
def extract_special_info(self) -> Optional[str]:
"""Extract special information (e.g., REPACK, PROPER, Director's Cut).
Returns:
Special release information or None if not available
"""
...
def extract_audio_langs(self) -> Optional[str]:
"""Extract audio language codes.
Returns:
Comma-separated language codes or None if not available
"""
...
def extract_meta_type(self) -> Optional[str]:
"""Extract metadata type/format information.
Returns:
The metadata type or None if not available
"""
...
def extract_size(self) -> Optional[int]:
"""Extract the file size in bytes.
Returns:
File size in bytes or None if not available
"""
...
def extract_modification_time(self) -> Optional[float]:
"""Extract the file modification timestamp.
Returns:
Unix timestamp of last modification or None if not available
"""
...
def extract_file_name(self) -> Optional[str]:
"""Extract the file name without path.
Returns:
The file name or None if not available
"""
...
def extract_file_path(self) -> Optional[str]:
"""Extract the full file path as string.
Returns:
The full file path or None if not available
"""
...
def extract_frame_class(self) -> Optional[str]:
"""Extract the frame class/aspect ratio classification.
Returns:
Frame class (e.g., "Widescreen", "Ultra-Widescreen") or None
"""
...
def extract_video_tracks(self) -> list[dict]:
"""Extract video track information.
Returns:
List of dictionaries containing video track metadata.
Returns empty list if no tracks available.
"""
...
def extract_audio_tracks(self) -> list[dict]:
"""Extract audio track information.
Returns:
List of dictionaries containing audio track metadata.
Returns empty list if no tracks available.
"""
...
def extract_subtitle_tracks(self) -> list[dict]:
"""Extract subtitle track information.
Returns:
List of dictionaries containing subtitle track metadata.
Returns empty list if no tracks available.
"""
...
def extract_anamorphic(self) -> Optional[str]:
"""Extract anamorphic encoding information.
Returns:
Anamorphic status or None if not available
"""
...
def extract_extension(self) -> Optional[str]:
"""Extract the file extension.
Returns:
File extension (without dot) or None if not available
"""
...
def extract_tmdb_url(self) -> Optional[str]:
"""Extract TMDB URL if available.
Returns:
Full TMDB URL or None if not available
"""
...
def extract_tmdb_id(self) -> Optional[str]:
"""Extract TMDB ID if available.
Returns:
TMDB ID as string or None if not available
"""
...
def extract_original_title(self) -> Optional[str]:
"""Extract the original title (non-localized).
Returns:
The original title or None if not available
"""
...

View File

@@ -1,71 +1,117 @@
class DefaultExtractor: """Default extractor providing fallback values.
"""Extractor that provides default fallback values"""
def extract_title(self): This module provides a minimal implementation of the DataExtractor protocol
that returns default/empty values for all extraction methods. Used as a
fallback when no specific extractor is available.
"""
from typing import Optional
class DefaultExtractor:
"""Extractor that provides default fallback values for all extraction methods.
This class implements the DataExtractor protocol by returning sensible
defaults (None, empty strings, empty lists) for all extraction operations.
It's used as a final fallback in the extractor chain when no other
extractor can provide data.
All methods return None or empty values, making it safe to use when
no actual data extraction is possible.
"""
def extract_title(self) -> Optional[str]:
"""Return default title.
Returns:
Default title string "Unknown Title"
"""
return "Unknown Title" return "Unknown Title"
def extract_year(self): def extract_year(self) -> Optional[str]:
"""Return year. Returns None as no year information is available."""
return None return None
def extract_source(self): def extract_source(self) -> Optional[str]:
"""Return video source. Returns None as no source information is available."""
return None return None
def extract_order(self): def extract_order(self) -> Optional[str]:
"""Return sequence order. Returns None as no order information is available."""
return None return None
def extract_resolution(self): def extract_resolution(self) -> Optional[str]:
"""Return resolution. Returns None as no resolution information is available."""
return None return None
def extract_hdr(self): def extract_hdr(self) -> Optional[str]:
"""Return HDR information. Returns None as no HDR information is available."""
return None return None
def extract_movie_db(self): def extract_movie_db(self) -> list[str] | None:
"""Return movie database ID. Returns None as no database information is available."""
return None return None
def extract_special_info(self): def extract_special_info(self) -> Optional[str]:
"""Return special edition info. Returns None as no special info is available."""
return None return None
def extract_audio_langs(self): def extract_audio_langs(self) -> Optional[str]:
"""Return audio languages. Returns None as no language information is available."""
return None return None
def extract_meta_type(self): def extract_meta_type(self) -> Optional[str]:
"""Return metadata type. Returns None as no type information is available."""
return None return None
def extract_size(self): def extract_size(self) -> Optional[int]:
"""Return file size. Returns None as no size information is available."""
return None return None
def extract_modification_time(self): def extract_modification_time(self) -> Optional[float]:
"""Return modification time. Returns None as no timestamp is available."""
return None return None
def extract_file_name(self): def extract_file_name(self) -> Optional[str]:
"""Return file name. Returns None as no filename is available."""
return None return None
def extract_file_path(self): def extract_file_path(self) -> Optional[str]:
"""Return file path. Returns None as no file path is available."""
return None return None
def extract_frame_class(self): def extract_frame_class(self) -> Optional[str]:
"""Return frame class. Returns None as no frame class information is available."""
return None return None
def extract_video_tracks(self): def extract_video_tracks(self) -> list[dict]:
"""Return video tracks. Returns empty list as no video tracks are available."""
return [] return []
def extract_audio_tracks(self): def extract_audio_tracks(self) -> list[dict]:
"""Return audio tracks. Returns empty list as no audio tracks are available."""
return [] return []
def extract_subtitle_tracks(self): def extract_subtitle_tracks(self) -> list[dict]:
"""Return subtitle tracks. Returns empty list as no subtitle tracks are available."""
return [] return []
def extract_anamorphic(self): def extract_anamorphic(self) -> Optional[str]:
"""Return anamorphic info. Returns None as no anamorphic information is available."""
return None return None
def extract_extension(self): def extract_extension(self) -> Optional[str]:
"""Return file extension. Returns 'ext' as default placeholder."""
return "ext"
def extract_tmdb_url(self) -> Optional[str]:
"""Return TMDB URL. Returns None as no TMDB URL is available."""
return None return None
def extract_tmdb_url(self): def extract_tmdb_id(self) -> Optional[str]:
"""Return TMDB ID. Returns None as no TMDB ID is available."""
return None return None
def extract_tmdb_id(self): def extract_original_title(self) -> Optional[str]:
return None """Return original title. Returns None as no original title is available."""
def extract_original_title(self):
return None return None

View File

@@ -1,3 +1,11 @@
"""Media metadata extraction coordinator.
This module provides the MediaExtractor class which coordinates multiple
specialized extractors to gather comprehensive metadata about media files.
It implements a priority-based extraction system where data is retrieved
from the most appropriate source.
"""
from pathlib import Path from pathlib import Path
from .filename_extractor import FilenameExtractor from .filename_extractor import FilenameExtractor
from .metadata_extractor import MetadataExtractor from .metadata_extractor import MetadataExtractor
@@ -8,16 +16,44 @@ from .default_extractor import DefaultExtractor
class MediaExtractor: class MediaExtractor:
"""Class to extract various metadata from media files using specialized extractors""" """Coordinator for extracting metadata from media files using multiple specialized extractors.
def __init__(self, file_path: Path): This class manages a collection of specialized extractors and provides a unified
interface for retrieving metadata. It implements a priority-based system where
each type of data is retrieved from the most appropriate source.
The extraction priority order varies by data type:
- Title: TMDB → Metadata → Filename → Default
- Year: Filename → Default
- Technical info: MediaInfo → Default
- File info: FileInfo → Default
Attributes:
file_path: Path to the media file
filename_extractor: Extracts metadata from filename patterns
metadata_extractor: Extracts embedded metadata tags
mediainfo_extractor: Extracts technical media information
fileinfo_extractor: Extracts basic file system information
tmdb_extractor: Fetches metadata from The Movie Database API
default_extractor: Provides fallback default values
Example:
>>> from pathlib import Path
>>> extractor = MediaExtractor(Path("Movie (2020) [1080p].mkv"))
>>> title = extractor.get("title")
>>> year = extractor.get("year")
>>> tracks = extractor.get("video_tracks")
"""
def __init__(self, file_path: Path, use_cache: bool = True):
self.file_path = file_path self.file_path = file_path
self.filename_extractor = FilenameExtractor(file_path) # Initialize all extractors - they use singleton Cache internally
self.metadata_extractor = MetadataExtractor(file_path) self.filename_extractor = FilenameExtractor(file_path, use_cache)
self.mediainfo_extractor = MediaInfoExtractor(file_path) self.metadata_extractor = MetadataExtractor(file_path, use_cache)
self.fileinfo_extractor = FileInfoExtractor(file_path) self.mediainfo_extractor = MediaInfoExtractor(file_path, use_cache)
self.tmdb_extractor = TMDBExtractor(file_path) self.fileinfo_extractor = FileInfoExtractor(file_path, use_cache)
self.tmdb_extractor = TMDBExtractor(file_path, use_cache)
self.default_extractor = DefaultExtractor() self.default_extractor = DefaultExtractor()
# Extractor mapping # Extractor mapping
@@ -165,10 +201,38 @@ class MediaExtractor:
("Default", "extract_subtitle_tracks"), ("Default", "extract_subtitle_tracks"),
], ],
}, },
"genres": {
"sources": [
("TMDB", "extract_genres"),
("Default", "extract_genres"),
],
},
"production_countries": {
"sources": [
("TMDB", "extract_production_countries"),
],
},
} }
def get(self, key: str, source: str | None = None): def get(self, key: str, source: str | None = None):
"""Get extracted data by key, optionally from specific source""" """Get metadata value by key, optionally from a specific source.
Retrieves metadata using a priority-based system. If a source is specified,
only that extractor is used. Otherwise, extractors are tried in priority
order until a non-None value is found.
Args:
key: The metadata key to retrieve (e.g., "title", "year", "resolution")
source: Optional specific extractor to use ("TMDB", "MediaInfo", "Filename", etc.)
Returns:
The extracted metadata value, or None if not found
Example:
>>> extractor = MediaExtractor(Path("movie.mkv"))
>>> title = extractor.get("title") # Try all sources in priority order
>>> year = extractor.get("year", source="Filename") # Use only filename
"""
if source: if source:
# Specific source requested - find the extractor and call the method directly # Specific source requested - find the extractor and call the method directly
for extractor_name, extractor in self._extractors.items(): for extractor_name, extractor in self._extractors.items():

View File

@@ -1,49 +1,94 @@
"""File system information extractor.
This module provides the FileInfoExtractor class for extracting basic
file system metadata such as size, timestamps, paths, and extensions.
"""
from pathlib import Path from pathlib import Path
import logging import logging
import os from ..cache import cached_method, Cache
from ..decorators import cached_method from ..logging_config import LoggerConfig # Initialize logging singleton
# Set up logging conditionally
if os.getenv('FORMATTER_LOG', '0') == '1':
logging.basicConfig(filename='formatter.log', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
else:
logging.basicConfig(level=logging.CRITICAL) # Disable logging
class FileInfoExtractor: class FileInfoExtractor:
"""Class to extract file information""" """Extractor for basic file system information.
def __init__(self, file_path: Path): This class extracts file system metadata including size, modification time,
self.file_path = file_path file name, path, and extension. All extraction methods are cached for
self._size = file_path.stat().st_size performance.
self._modification_time = file_path.stat().st_mtime
self._file_name = file_path.name Attributes:
self._file_path = str(file_path) file_path: Path object pointing to the file
self._cache = {} # Internal cache for method results _size: Cached file size in bytes
logging.info(f"FileInfoExtractor: file_name={self._file_name!r}, file_path={self._file_path!r}") _modification_time: Cached modification timestamp
_file_name: Cached file name
_file_path: Cached full file path as string
_cache: Internal cache for method results
Example:
>>> from pathlib import Path
>>> extractor = FileInfoExtractor(Path("movie.mkv"))
>>> size = extractor.extract_size() # Returns size in bytes
>>> name = extractor.extract_file_name() # Returns "movie.mkv"
"""
def __init__(self, file_path: Path, use_cache: bool = True):
"""Initialize the FileInfoExtractor.
Args:
file_path: Path object pointing to the file to extract info from
use_cache: Whether to use caching (default: True)
"""
self._file_path = file_path
self.file_path = file_path # Expose for cache key generation
self.cache = Cache() if use_cache else None # Singleton cache for @cached_method decorator
self.settings = None # Will be set by Settings singleton if needed
self._stat = file_path.stat()
self._cache: dict[str, any] = {} # Internal cache for method results
@cached_method() @cached_method()
def extract_size(self) -> int: def extract_size(self) -> int:
"""Extract file size in bytes""" """Extract file size in bytes.
return self._size
Returns:
File size in bytes as an integer
"""
return self._stat.st_size
@cached_method() @cached_method()
def extract_modification_time(self) -> float: def extract_modification_time(self) -> float:
"""Extract file modification time""" """Extract file modification time.
return self._modification_time
Returns:
Unix timestamp (seconds since epoch) as a float
"""
return self._stat.st_mtime
@cached_method() @cached_method()
def extract_file_name(self) -> str: def extract_file_name(self) -> str:
"""Extract file name""" """Extract file name (basename).
return self._file_name
Returns:
File name including extension (e.g., "movie.mkv")
"""
return self._file_path.name
@cached_method() @cached_method()
def extract_file_path(self) -> str: def extract_file_path(self) -> str:
"""Extract full file path as string""" """Extract full file path as string.
return self._file_path
Returns:
Absolute file path as a string
"""
return str(self._file_path)
@cached_method() @cached_method()
def extract_extension(self) -> str: def extract_extension(self) -> str | None:
"""Extract file extension without the dot""" """Extract file extension without the dot.
return self.file_path.suffix.lower().lstrip('.')
Returns:
File extension in lowercase without leading dot (e.g., "mkv", "mp4"),
or None if no extension exists
"""
ext = self._file_path.suffix.lower().lstrip('.')
return ext if ext else None

View File

@@ -1,15 +1,24 @@
import re import re
import logging
from pathlib import Path from pathlib import Path
from collections import Counter from collections import Counter
from ..constants import SOURCE_DICT, FRAME_CLASSES, MOVIE_DB_DICT, SPECIAL_EDITIONS from ..constants import (
from ..decorators import cached_method SOURCE_DICT, FRAME_CLASSES, MOVIE_DB_DICT, SPECIAL_EDITIONS, SKIP_WORDS,
NON_STANDARD_QUALITY_INDICATORS,
is_valid_year,
CYRILLIC_TO_ENGLISH
)
from ..cache import cached_method, Cache
from ..utils.pattern_utils import PatternExtractor
import langcodes import langcodes
logger = logging.getLogger(__name__)
class FilenameExtractor: class FilenameExtractor:
"""Class to extract information from filename""" """Class to extract information from filename"""
def __init__(self, file_path: Path | str): def __init__(self, file_path: Path | str, use_cache: bool = True):
if isinstance(file_path, str): if isinstance(file_path, str):
self.file_path = Path(file_path) self.file_path = Path(file_path)
self.file_name = file_path self.file_name = file_path
@@ -17,14 +26,15 @@ class FilenameExtractor:
self.file_path = file_path self.file_path = file_path
self.file_name = file_path.name self.file_name = file_path.name
self.cache = Cache() if use_cache else None # Singleton cache for @cached_method decorator
self.settings = None # Will be set by Settings singleton if needed
# Initialize utility helper
self._pattern_extractor = PatternExtractor()
def _normalize_cyrillic(self, text: str) -> str: def _normalize_cyrillic(self, text: str) -> str:
"""Normalize Cyrillic characters to English equivalents for parsing""" """Normalize Cyrillic characters to English equivalents for parsing"""
replacements = { for cyr, eng in CYRILLIC_TO_ENGLISH.items():
'р': 'p',
'і': 'i',
# Add more as needed
}
for cyr, eng in replacements.items():
text = text.replace(cyr, eng) text = text.replace(cyr, eng)
return text return text
@@ -57,10 +67,9 @@ class FilenameExtractor:
# Last resort: any 4-digit number # Last resort: any 4-digit number
any_match = re.search(r'\b(\d{4})\b', self.file_name) any_match = re.search(r'\b(\d{4})\b', self.file_name)
if any_match: if any_match:
year = any_match.group(1) year = int(any_match.group(1))
# Basic sanity check # Basic sanity check using constants
current_year = 2025 if is_valid_year(year):
if 1900 <= int(year) <= current_year + 10:
year_pos = any_match.start() # Cut before the year for plain years year_pos = any_match.start() # Cut before the year for plain years
# Find source position # Find source position
@@ -119,7 +128,14 @@ class FilenameExtractor:
# Clean up title: remove leading/trailing brackets and dots # Clean up title: remove leading/trailing brackets and dots
title = title.strip('[](). ') title = title.strip('[](). ')
# Replace dots with spaces if they appear to be word separators
# Only replace dots that are surrounded by letters/digits (not at edges)
title = re.sub(r'(?<=[a-zA-Z0-9À-ÿ])\.(?=[a-zA-Z0-9À-ÿ])', ' ', title)
# Clean up multiple spaces
title = re.sub(r'\s+', ' ', title).strip()
return title if title else None return title if title else None
@cached_method() @cached_method()
@@ -138,12 +154,11 @@ class FilenameExtractor:
# Last resort: any 4-digit number (but this is less reliable) # Last resort: any 4-digit number (but this is less reliable)
any_match = re.search(r'\b(\d{4})\b', self.file_name) any_match = re.search(r'\b(\d{4})\b', self.file_name)
if any_match: if any_match:
year = any_match.group(1) year = int(any_match.group(1))
# Basic sanity check: years should be between 1900 and current year + a few years # Basic sanity check using constants
current_year = 2025 # Update this as needed if is_valid_year(year):
if 1900 <= int(year) <= current_year + 10:
year_pos = any_match.start() year_pos = any_match.start()
return year return str(year)
return None return None
@@ -198,9 +213,8 @@ class FilenameExtractor:
# Fallback to height-based if not in constants # Fallback to height-based if not in constants
return self._get_frame_class_from_height(height) return self._get_frame_class_from_height(height)
# If no specific resolution found, check for quality indicators # If no specific resolution found, check for non-standard quality indicators
unclassified_indicators = ['SD', 'LQ', 'HD', 'QHD'] for indicator in NON_STANDARD_QUALITY_INDICATORS:
for indicator in unclassified_indicators:
if re.search(r'\b' + re.escape(indicator) + r'\b', self.file_name, re.IGNORECASE): if re.search(r'\b' + re.escape(indicator) + r'\b', self.file_name, re.IGNORECASE):
return None return None
@@ -222,23 +236,10 @@ class FilenameExtractor:
@cached_method() @cached_method()
def extract_movie_db(self) -> list[str] | None: def extract_movie_db(self) -> list[str] | None:
"""Extract movie database identifier from filename""" """Extract movie database identifier from filename"""
# Look for patterns at the end of filename in brackets or braces # Use PatternExtractor utility to avoid code duplication
# Patterns: [tmdbid-123] {imdb-tt123} [imdbid-tt123] etc. db_info = self._pattern_extractor.extract_movie_db_ids(self.file_name)
if db_info:
# Match patterns like [tmdbid-123456] or {imdb-tt1234567} return [db_info['type'], db_info['id']]
pattern = r'[\[\{]([a-zA-Z]+(?:id)?)[-\s]*([a-zA-Z0-9]+)[\]\}]'
matches = re.findall(pattern, self.file_name)
if matches:
# Take the last match (closest to end of filename)
db_type, db_id = matches[-1]
# Normalize database type
db_type_lower = db_type.lower()
for db_key, db_info in MOVIE_DB_DICT.items():
if any(db_type_lower.startswith(pattern.rstrip('-')) for pattern in db_info['patterns']):
return [db_key, db_id]
return None return None
@cached_method() @cached_method()
@@ -309,8 +310,8 @@ class FilenameExtractor:
count = int(lang_match.group(1)) if lang_match.group(1) else 1 count = int(lang_match.group(1)) if lang_match.group(1) else 1
lang_code = lang_match.group(2) lang_code = lang_match.group(2)
# Skip if it's a quality/resolution indicator # Skip if it's a quality/resolution indicator or other skip word
if lang_code in ['sd', 'hd', 'lq', 'qhd', 'uhd', 'p', 'i', 'hdr', 'sdr']: if lang_code in SKIP_WORDS:
continue continue
# Skip if the language code is not at the end or if there are extra letters after # Skip if the language code is not at the end or if there are extra letters after
@@ -324,66 +325,46 @@ class FilenameExtractor:
lang_obj = langcodes.Language.get(lang_code) lang_obj = langcodes.Language.get(lang_code)
iso3_code = lang_obj.to_alpha3() iso3_code = lang_obj.to_alpha3()
langs.extend([iso3_code] * count) langs.extend([iso3_code] * count)
except: except (LookupError, ValueError, AttributeError) as e:
# Skip invalid language codes # Skip invalid language codes
logger.debug(f"Invalid language code '{lang_code}': {e}")
pass pass
# Second, look for standalone language codes outside brackets # Second, look for standalone language codes outside brackets
# Remove bracketed content first # Remove bracketed content first
text_without_brackets = re.sub(r'\[([^\]]+)\]', '', self.file_name) text_without_brackets = re.sub(r'\[([^\]]+)\]', '', self.file_name)
# Known language codes (2-3 letter ISO 639-1 or 639-3)
known_language_codes = {
'eng', 'ukr', 'rus', 'fra', 'deu', 'spa', 'ita', 'por', 'nor', 'swe', 'dan', 'fin', 'pol', 'cze', 'hun', 'tur', 'ara', 'heb', 'hin', 'jpn', 'kor', 'chi', 'tha', 'vie', 'und',
'dut', 'nld', 'bel', 'bul', 'hrv', 'ces', 'dan', 'nld', 'est', 'fin', 'fra', 'deu', 'ell', 'heb', 'hin', 'hrv', 'hun', 'ind', 'ita', 'jpn', 'kor', 'lav', 'lit', 'mkd', 'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'srp', 'swe', 'tha', 'tur', 'ukr', 'vie', 'und', 'zho',
'arb', 'ben', 'hin', 'mar', 'tam', 'tel', 'urd', 'guj', 'kan', 'mal', 'ori', 'pan', 'asm', 'mai', 'bho', 'nep', 'sin', 'san', 'tib', 'mon', 'kaz', 'uzb', 'kir', 'tuk', 'aze', 'kat', 'hye', 'geo', 'ell', 'sqi', 'bos', 'hrv', 'srp', 'slv', 'mkd', 'bul', 'alb', 'ron', 'mol', 'hun',
'fin', 'swe', 'nor', 'dan', 'isl', 'fao', 'est', 'lav', 'lit', 'bel', 'ukr', 'rus', 'pol', 'cze', 'slk', 'slv', 'hrv', 'bos', 'srp', 'mkd', 'bul', 'ell', 'alb', 'ron', 'hun', 'tur', 'aze', 'geo', 'arm', 'kat', 'hye', 'per', 'kur', 'pus', 'urd', 'ara', 'heb', 'san', 'hin', 'ben', 'tam', 'tel', 'mar', 'guj', 'kan', 'mal', 'ori', 'pan', 'asm', 'mai', 'bho', 'awa', 'mag', 'nep', 'sin', 'div', 'tib', 'mon', 'kaz', 'kir', 'tuk', 'uzb', 'jpn', 'kor', 'chi', 'tha', 'vie', 'und', 'lao', 'khm', 'mya', 'vie', 'und', 'ind', 'msa', 'zho', 'yue', 'wuu', 'nan', 'hak', 'gan', 'hsn',
'spa', 'por', 'fra', 'ita', 'deu', 'nld', 'dut', 'swe', 'nor', 'dan', 'fin', 'est', 'lav', 'lit', 'pol', 'cze', 'slk', 'slv', 'hrv', 'bos', 'srp', 'mkd', 'bul', 'ell', 'alb', 'ron', 'hun', 'tur', 'aze', 'geo', 'arm', 'kat', 'hye', 'per', 'kur', 'pus', 'urd', 'ara', 'heb', 'san', 'hin', 'ben', 'tam', 'tel', 'mar', 'guj', 'kan', 'mal', 'ori', 'pan', 'asm', 'mai', 'bho', 'awa', 'mag', 'nep', 'sin', 'div', 'tib', 'mon', 'kaz', 'kir', 'tuk', 'uzb', 'jpn', 'kor', 'chi', 'tha', 'vie', 'und', 'lao', 'khm', 'mya', 'vie', 'und', 'ind', 'msa', 'zho', 'yue', 'wuu', 'nan', 'hak', 'gan', 'hsn'
}
allowed_title_case = {'ukr', 'nor', 'eng', 'rus', 'fra', 'deu', 'spa', 'ita', 'por', 'swe', 'dan', 'fin', 'pol', 'cze', 'hun', 'tur', 'ara', 'heb', 'hin', 'jpn', 'kor', 'chi', 'tha', 'vie', 'und'}
# Look for language codes in various formats:
# - Uppercase: ENG, UKR, NOR
# - Title case: Ukr, Nor, Eng
# - Lowercase: ukr, nor, eng
# - In dot-separated parts: .ukr. .eng.
# Split on dots, spaces, and underscores # Split on dots, spaces, and underscores
parts = re.split(r'[.\s_]+', text_without_brackets) parts = re.split(r'[.\s_]+', text_without_brackets)
for part in parts: for part in parts:
part = part.strip() part = part.strip()
if not part or len(part) < 2: if not part or len(part) < 2:
continue continue
part_lower = part.lower() part_lower = part.lower()
# Check if this part is a 2-3 letter language code # Check if this part is a 2-3 letter code
if re.match(r'^[a-zA-Z]{2,3}$', part): if not re.match(r'^[a-zA-Z]{2,3}$', part):
# Skip title case 2-letter words to avoid false positives like "In" -> "ind" continue
if part.istitle() and len(part) == 2:
continue # Skip title case 2-letter words to avoid false positives like "In" -> "ind"
if part.istitle() and part_lower not in allowed_title_case: if part.istitle() and len(part) == 2:
continue continue
skip_words = [
'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her', 'was', 'one', 'our', 'out', 'day', 'get', 'has', 'him', 'his', 'how', 'its', 'may', 'new', 'now', 'old', 'see', 'two', 'way', 'who', 'boy', 'did', 'has', 'let', 'put', 'say', 'she', 'too', 'use', # Skip known non-language words
'avi', 'mkv', 'mp4', 'mpg', 'mov', 'wmv', 'flv', 'webm', 'm4v', 'm2ts', 'ts', 'vob', 'iso', 'img', if part_lower in SKIP_WORDS:
'sd', 'hd', 'lq', 'qhd', 'uhd', 'p', 'i', 'hdr', 'sdr', '4k', '8k', '2160p', '1080p', '720p', '480p', '360p', '240p', '144p', continue
'web', 'dl', 'rip', 'bluray', 'dvd', 'hdtv', 'bdrip', 'dvdrip', 'xvid', 'divx', 'h264', 'h265', 'x264', 'x265', 'hevc', 'avc',
'ma', 'atmos', 'dts', 'aac', 'ac3', 'mp3', 'flac', 'wav', 'wma', 'ogg', 'opus' # Try to validate with langcodes library
] try:
lang_obj = langcodes.Language.get(part_lower)
if part_lower not in skip_words and part_lower in known_language_codes: iso3_code = lang_obj.to_alpha3()
lang_code = part_lower langs.append(iso3_code)
except (LookupError, ValueError, AttributeError) as e:
# Convert to 3-letter ISO code # Not a valid language code, skip
try: logger.debug(f"Invalid language code '{part_lower}': {e}")
lang_obj = langcodes.Language.get(lang_code) pass
iso3_code = lang_obj.to_alpha3()
langs.append(iso3_code)
except:
# Skip invalid language codes
pass
if not langs: if not langs:
return '' return ''
@@ -399,39 +380,47 @@ class FilenameExtractor:
audio_langs = [f"{count}{lang}" if count > 1 else lang for lang, count in lang_counts.items()] audio_langs = [f"{count}{lang}" if count > 1 else lang for lang, count in lang_counts.items()]
return ','.join(audio_langs) return ','.join(audio_langs)
@cached_method()
def extract_extension(self) -> str | None:
"""Extract file extension from filename"""
# Use pathlib to extract extension properly
ext = self.file_path.suffix
# Remove leading dot and return
return ext[1:] if ext else None
@cached_method() @cached_method()
def extract_audio_tracks(self) -> list[dict]: def extract_audio_tracks(self) -> list[dict]:
"""Extract audio track data from filename (simplified version with only language)""" """Extract audio track data from filename (simplified version with only language)"""
# Similar to extract_audio_langs but returns list of dicts # Similar to extract_audio_langs but returns list of dicts
tracks = [] tracks = []
# First, look for languages inside brackets # First, look for languages inside brackets
bracket_pattern = r'\[([^\]]+)\]' bracket_pattern = r'\[([^\]]+)\]'
brackets = re.findall(bracket_pattern, self.file_name) brackets = re.findall(bracket_pattern, self.file_name)
for bracket in brackets: for bracket in brackets:
bracket_lower = bracket.lower() bracket_lower = bracket.lower()
# Skip brackets that contain movie database patterns # Skip brackets that contain movie database patterns
if any(db in bracket_lower for db in ['imdb', 'tmdb', 'tvdb']): if any(db in bracket_lower for db in ['imdb', 'tmdb', 'tvdb']):
continue continue
# Parse items separated by commas or underscores # Parse items separated by commas or underscores
items = re.split(r'[,_]', bracket) items = re.split(r'[,_]', bracket)
items = [item.strip() for item in items] items = [item.strip() for item in items]
for item in items: for item in items:
# Skip empty items or items that are clearly not languages # Skip empty items or items that are clearly not languages
if not item or len(item) < 2: if not item or len(item) < 2:
continue continue
item_lower = item.lower() item_lower = item.lower()
# Skip subtitle indicators # Skip subtitle indicators
if item_lower in ['sub', 'subs', 'subtitle']: if item_lower in ['sub', 'subs', 'subtitle']:
continue continue
# Check if item contains language codes (2-3 letter codes) # Check if item contains language codes (2-3 letter codes)
# Pattern: optional number + optional 'x' + language code # Pattern: optional number + optional 'x' + language code
# Allow the language code to be at the end of the item # Allow the language code to be at the end of the item
@@ -439,81 +428,61 @@ class FilenameExtractor:
if lang_match: if lang_match:
count = int(lang_match.group(1)) if lang_match.group(1) else 1 count = int(lang_match.group(1)) if lang_match.group(1) else 1
lang_code = lang_match.group(2) lang_code = lang_match.group(2)
# Skip if it's a quality/resolution indicator # Skip if it's a quality/resolution indicator or other skip word
if lang_code in ['sd', 'hd', 'lq', 'qhd', 'uhd', 'p', 'i', 'hdr', 'sdr']: if lang_code in SKIP_WORDS:
continue continue
# Skip if the language code is not at the end or if there are extra letters after # Skip if the language code is not at the end or if there are extra letters after
# But allow prefixes like numbers and 'x' # But allow prefixes like numbers and 'x'
prefix = item_lower[:-len(lang_code)] prefix = item_lower[:-len(lang_code)]
if not re.match(r'^(?:\d+x?)?$', prefix): if not re.match(r'^(?:\d+x?)?$', prefix):
continue continue
# Convert to 3-letter ISO code # Convert to 3-letter ISO code
try: try:
lang_obj = langcodes.Language.get(lang_code) lang_obj = langcodes.Language.get(lang_code)
iso3_code = lang_obj.to_alpha3() iso3_code = lang_obj.to_alpha3()
tracks.append({'language': iso3_code}) tracks.append({'language': iso3_code})
except: except (LookupError, ValueError, AttributeError) as e:
# Skip invalid language codes # Skip invalid language codes
logger.debug(f"Invalid language code '{lang_code}': {e}")
pass pass
# Second, look for standalone language codes outside brackets # Second, look for standalone language codes outside brackets
# Remove bracketed content first # Remove bracketed content first
text_without_brackets = re.sub(r'\[([^\]]+)\]', '', self.file_name) text_without_brackets = re.sub(r'\[([^\]]+)\]', '', self.file_name)
# Known language codes (2-3 letter ISO 639-1 or 639-3)
known_language_codes = {
'eng', 'ukr', 'rus', 'fra', 'deu', 'spa', 'ita', 'por', 'nor', 'swe', 'dan', 'fin', 'pol', 'cze', 'hun', 'tur', 'ara', 'heb', 'hin', 'jpn', 'kor', 'chi', 'tha', 'vie', 'und',
'dut', 'nld', 'bel', 'bul', 'hrv', 'ces', 'dan', 'nld', 'est', 'fin', 'fra', 'deu', 'ell', 'heb', 'hin', 'hrv', 'hun', 'ind', 'ita', 'jpn', 'kor', 'lav', 'lit', 'mkd', 'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'srp', 'swe', 'tha', 'tur', 'ukr', 'vie', 'und', 'zho',
'arb', 'ben', 'hin', 'mar', 'tam', 'tel', 'urd', 'guj', 'kan', 'mal', 'ori', 'pan', 'asm', 'mai', 'bho', 'nep', 'sin', 'san', 'tib', 'mon', 'kaz', 'uzb', 'kir', 'tuk', 'aze', 'kat', 'hye', 'geo', 'ell', 'sqi', 'bos', 'hrv', 'srp', 'slv', 'mkd', 'bul', 'alb', 'ron', 'mol', 'hun',
'fin', 'swe', 'nor', 'dan', 'isl', 'fao', 'est', 'lav', 'lit', 'bel', 'ukr', 'rus', 'pol', 'cze', 'slk', 'slv', 'hrv', 'bos', 'srp', 'mkd', 'bul', 'ell', 'alb', 'ron', 'hun', 'tur', 'aze', 'geo', 'arm', 'kat', 'hye', 'per', 'kur', 'pus', 'urd', 'ara', 'heb', 'san', 'hin', 'ben', 'tam', 'tel', 'mar', 'guj', 'kan', 'mal', 'ori', 'pan', 'asm', 'mai', 'bho', 'awa', 'mag', 'nep', 'sin', 'div', 'tib', 'mon', 'kaz', 'kir', 'tuk', 'uzb', 'jpn', 'kor', 'chi', 'tha', 'vie', 'und', 'lao', 'khm', 'mya', 'vie', 'und', 'ind', 'msa', 'zho', 'yue', 'wuu', 'nan', 'hak', 'gan', 'hsn',
'spa', 'por', 'fra', 'ita', 'deu', 'nld', 'dut', 'swe', 'nor', 'dan', 'fin', 'est', 'lav', 'lit', 'pol', 'cze', 'slk', 'slv', 'hrv', 'bos', 'srp', 'mkd', 'bul', 'ell', 'alb', 'ron', 'hun', 'tur', 'aze', 'geo', 'arm', 'kat', 'hye', 'per', 'kur', 'pus', 'urd', 'ara', 'heb', 'san', 'hin', 'ben', 'tam', 'tel', 'mar', 'guj', 'kan', 'mal', 'ori', 'pan', 'asm', 'mai', 'bho', 'awa', 'mag', 'nep', 'sin', 'div', 'tib', 'mon', 'kaz', 'kir', 'tuk', 'uzb', 'jpn', 'kor', 'chi', 'tha', 'vie', 'und', 'lao', 'khm', 'mya', 'vie', 'und', 'ind', 'msa', 'zho', 'yue', 'wuu', 'nan', 'hak', 'gan', 'hsn'
}
allowed_title_case = {'ukr', 'nor', 'eng', 'rus', 'fra', 'deu', 'spa', 'ita', 'por', 'swe', 'dan', 'fin', 'pol', 'cze', 'hun', 'tur', 'ara', 'heb', 'hin', 'jpn', 'kor', 'chi', 'tha', 'vie', 'und'}
# Look for language codes in various formats:
# - Uppercase: ENG, UKR, NOR
# - Title case: Ukr, Nor, Eng
# - Lowercase: ukr, nor, eng
# - In dot-separated parts: .ukr. .eng.
# Split on dots, spaces, and underscores # Split on dots, spaces, and underscores
parts = re.split(r'[.\s_]+', text_without_brackets) parts = re.split(r'[.\s_]+', text_without_brackets)
for part in parts: for part in parts:
part = part.strip() part = part.strip()
if not part or len(part) < 2: if not part or len(part) < 2:
continue continue
part_lower = part.lower() part_lower = part.lower()
# Check if this part is a 2-3 letter language code # Check if this part is a 2-3 letter code
if re.match(r'^[a-zA-Z]{2,3}$', part): if not re.match(r'^[a-zA-Z]{2,3}$', part):
# Skip title case 2-letter words to avoid false positives like "In" -> "ind" continue
if part.istitle() and len(part) == 2:
continue # Skip title case 2-letter words to avoid false positives like "In" -> "ind"
if part.istitle() and part_lower not in allowed_title_case: if part.istitle() and len(part) == 2:
continue continue
skip_words = [
'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her', 'was', 'one', 'our', 'out', 'day', 'get', 'has', 'him', 'his', 'how', 'its', 'may', 'new', 'now', 'old', 'see', 'two', 'way', 'who', 'boy', 'did', 'has', 'let', 'put', 'say', 'she', 'too', 'use', # Skip known non-language words
'avi', 'mkv', 'mp4', 'mpg', 'mov', 'wmv', 'flv', 'webm', 'm4v', 'm2ts', 'ts', 'vob', 'iso', 'img', if part_lower in SKIP_WORDS:
'sd', 'hd', 'lq', 'qhd', 'uhd', 'p', 'i', 'hdr', 'sdr', '4k', '8k', '2160p', '1080p', '720p', '480p', '360p', '240p', '144p', continue
'web', 'dl', 'rip', 'bluray', 'dvd', 'hdtv', 'bdrip', 'dvdrip', 'xvid', 'divx', 'h264', 'h265', 'x264', 'x265', 'hevc', 'avc',
'ma', 'atmos', 'dts', 'aac', 'ac3', 'mp3', 'flac', 'wav', 'wma', 'ogg', 'opus' # Try to validate with langcodes library
] try:
lang_obj = langcodes.Language.get(part_lower)
if part_lower not in skip_words and part_lower in known_language_codes: iso3_code = lang_obj.to_alpha3()
lang_code = part_lower tracks.append({'language': iso3_code})
except (LookupError, ValueError, AttributeError) as e:
# Convert to 3-letter ISO code # Not a valid language code, skip
try: logger.debug(f"Invalid language code '{part_lower}': {e}")
lang_obj = langcodes.Language.get(lang_code) pass
iso3_code = lang_obj.to_alpha3()
tracks.append({'language': iso3_code})
except:
# Skip invalid language codes
pass
return tracks return tracks

View File

@@ -1,47 +1,46 @@
from pathlib import Path from pathlib import Path
from pymediainfo import MediaInfo from pymediainfo import MediaInfo
from collections import Counter from collections import Counter
from ..constants import FRAME_CLASSES, MEDIA_TYPES from ..constants import FRAME_CLASSES, get_extension_from_format
from ..decorators import cached_method from ..cache import cached_method, Cache
import langcodes import langcodes
import logging
logger = logging.getLogger(__name__)
class MediaInfoExtractor: class MediaInfoExtractor:
"""Class to extract information from MediaInfo""" """Class to extract information from MediaInfo"""
def __init__(self, file_path: Path): def __init__(self, file_path: Path, use_cache: bool = True):
self.file_path = file_path self.file_path = file_path
self.cache = Cache() if use_cache else None # Singleton cache for @cached_method decorator
self.settings = None # Will be set by Settings singleton if needed
self._cache = {} # Internal cache for method results self._cache = {} # Internal cache for method results
try:
self.media_info = MediaInfo.parse(file_path) # Parse media info - set to None on failure
self.media_info = MediaInfo.parse(file_path) if file_path.exists() else None
# Extract tracks
if self.media_info:
self.video_tracks = [t for t in self.media_info.tracks if t.track_type == 'Video'] self.video_tracks = [t for t in self.media_info.tracks if t.track_type == 'Video']
self.audio_tracks = [t for t in self.media_info.tracks if t.track_type == 'Audio'] self.audio_tracks = [t for t in self.media_info.tracks if t.track_type == 'Audio']
self.sub_tracks = [t for t in self.media_info.tracks if t.track_type == 'Text'] self.sub_tracks = [t for t in self.media_info.tracks if t.track_type == 'Text']
except Exception: else:
self.media_info = None
self.video_tracks = [] self.video_tracks = []
self.audio_tracks = [] self.audio_tracks = []
self.sub_tracks = [] self.sub_tracks = []
# Build mapping from meta_type to extensions
self._format_to_extensions = {}
for ext, info in MEDIA_TYPES.items():
meta_type = info.get('meta_type')
if meta_type:
if meta_type not in self._format_to_extensions:
self._format_to_extensions[meta_type] = []
self._format_to_extensions[meta_type].append(ext)
def _get_frame_class_from_height(self, height: int) -> str | None: def _get_frame_class_from_height(self, height: int) -> str | None:
"""Get frame class from video height, finding closest match if exact not found""" """Get frame class from video height, finding closest match if exact not found"""
if not height: if not height:
return None return None
# First try exact match # First try exact match
for frame_class, info in FRAME_CLASSES.items(): for frame_class, info in FRAME_CLASSES.items():
if height == info['nominal_height']: if height == info['nominal_height']:
return frame_class return frame_class
# If no exact match, find closest # If no exact match, find closest
closest = None closest = None
min_diff = float('inf') min_diff = float('inf')
@@ -50,7 +49,7 @@ class MediaInfoExtractor:
if diff < min_diff: if diff < min_diff:
min_diff = diff min_diff = diff
closest = frame_class closest = frame_class
# Only return if difference is reasonable (within 50 pixels) # Only return if difference is reasonable (within 50 pixels)
if min_diff <= 50: if min_diff <= 50:
return closest return closest
@@ -73,37 +72,60 @@ class MediaInfoExtractor:
width = getattr(self.video_tracks[0], 'width', None) width = getattr(self.video_tracks[0], 'width', None)
if not height or not width: if not height or not width:
return None return None
# Check if interlaced # Check if interlaced - try multiple attributes
# PyMediaInfo may use different attribute names depending on version
scan_type_attr = getattr(self.video_tracks[0], 'scan_type', None)
interlaced = getattr(self.video_tracks[0], 'interlaced', None) interlaced = getattr(self.video_tracks[0], 'interlaced', None)
scan_type = 'i' if interlaced == 'Yes' else 'p'
logger.debug(f"[{self.file_path.name}] Frame class detection - Resolution: {width}x{height}")
logger.debug(f"[{self.file_path.name}] scan_type attribute: {scan_type_attr!r} (type: {type(scan_type_attr).__name__})")
logger.debug(f"[{self.file_path.name}] interlaced attribute: {interlaced!r} (type: {type(interlaced).__name__})")
# Determine scan type from available attributes
# Check scan_type first (e.g., "Interlaced", "Progressive", "MBAFF")
if scan_type_attr and isinstance(scan_type_attr, str):
scan_type = 'i' if 'interlaced' in scan_type_attr.lower() else 'p'
logger.debug(f"[{self.file_path.name}] Using scan_type: {scan_type_attr!r} -> scan_type={scan_type!r}")
# Then check interlaced flag (e.g., "Yes", "No")
elif interlaced and isinstance(interlaced, str):
scan_type = 'i' if interlaced.lower() in ['yes', 'true', '1'] else 'p'
logger.debug(f"[{self.file_path.name}] Using interlaced: {interlaced!r} -> scan_type={scan_type!r}")
else:
# Default to progressive if no information available
scan_type = 'p'
logger.debug(f"[{self.file_path.name}] No scan type info, defaulting to progressive")
# Calculate effective height for frame class determination # Calculate effective height for frame class determination
aspect_ratio = 16 / 9 aspect_ratio = 16 / 9
if height > width: if height > width:
effective_height = height / aspect_ratio effective_height = height / aspect_ratio
else: else:
effective_height = height effective_height = height
# First, try to match width to typical widths # First, try to match width to typical widths
# Use a larger tolerance (10 pixels) to handle cinema/ultrawide aspect ratios
width_matches = [] width_matches = []
for frame_class, info in FRAME_CLASSES.items(): for frame_class, info in FRAME_CLASSES.items():
for tw in info['typical_widths']: for tw in info['typical_widths']:
if abs(width - tw) <= 5 and frame_class.endswith(scan_type): if abs(width - tw) <= 10 and frame_class.endswith(scan_type):
diff = abs(height - info['nominal_height']) diff = abs(height - info['nominal_height'])
width_matches.append((frame_class, diff)) width_matches.append((frame_class, diff))
if width_matches: if width_matches:
# Choose the frame class with the smallest height difference # Choose the frame class with the smallest height difference
width_matches.sort(key=lambda x: x[1]) width_matches.sort(key=lambda x: x[1])
return width_matches[0][0] result = width_matches[0][0]
logger.debug(f"[{self.file_path.name}] Result (width match): {result!r}")
return result
# If no width match, fall back to height-based matching # If no width match, fall back to height-based matching
# First try exact match with standard frame classes # First try exact match with standard frame classes
frame_class = f"{int(round(effective_height))}{scan_type}" frame_class = f"{int(round(effective_height))}{scan_type}"
if frame_class in FRAME_CLASSES: if frame_class in FRAME_CLASSES:
logger.debug(f"[{self.file_path.name}] Result (exact height match): {frame_class!r}")
return frame_class return frame_class
# Find closest standard height match # Find closest standard height match
closest_class = None closest_class = None
min_diff = float('inf') min_diff = float('inf')
@@ -113,12 +135,14 @@ class MediaInfoExtractor:
if diff < min_diff: if diff < min_diff:
min_diff = diff min_diff = diff
closest_class = fc closest_class = fc
# Return closest standard match if within reasonable distance (20 pixels) # Return closest standard match if within reasonable distance (20 pixels)
if closest_class and min_diff <= 20: if closest_class and min_diff <= 20:
logger.debug(f"[{self.file_path.name}] Result (closest match, diff={min_diff}): {closest_class!r}")
return closest_class return closest_class
# For non-standard resolutions, create a custom frame class # For non-standard resolutions, create a custom frame class
logger.debug(f"[{self.file_path.name}] Result (custom/non-standard): {frame_class!r}")
return frame_class return frame_class
@cached_method() @cached_method()
@@ -131,7 +155,7 @@ class MediaInfoExtractor:
if width and height: if width and height:
return width, height return width, height
return None return None
@cached_method() @cached_method()
def extract_aspect_ratio(self) -> str | None: def extract_aspect_ratio(self) -> str | None:
"""Extract video aspect ratio from media info""" """Extract video aspect ratio from media info"""
@@ -165,10 +189,11 @@ class MediaInfoExtractor:
lang_obj = langcodes.Language.get(lang_code.lower()) lang_obj = langcodes.Language.get(lang_code.lower())
alpha3 = lang_obj.to_alpha3() alpha3 = lang_obj.to_alpha3()
langs.append(alpha3) langs.append(alpha3)
except: except (LookupError, ValueError, AttributeError) as e:
# If conversion fails, use the original code # If conversion fails, use the original code
logger.debug(f"Invalid language code '{lang_code}': {e}")
langs.append(lang_code.lower()[:3]) langs.append(lang_code.lower()[:3])
lang_counts = Counter(langs) lang_counts = Counter(langs)
audio_langs = [f"{count}{lang}" if count > 1 else lang for lang, count in lang_counts.items()] audio_langs = [f"{count}{lang}" if count > 1 else lang for lang, count in lang_counts.items()]
return ','.join(audio_langs) return ','.join(audio_langs)
@@ -240,23 +265,31 @@ class MediaInfoExtractor:
@cached_method() @cached_method()
def extract_extension(self) -> str | None: def extract_extension(self) -> str | None:
"""Extract file extension based on container format""" """Extract file extension based on container format.
Uses MediaInfo's format field to determine the appropriate file extension.
Handles special cases like Matroska 3D (mk3d vs mkv).
Returns:
File extension (e.g., "mp4", "mkv") or None if format is unknown
"""
if not self.media_info: if not self.media_info:
return None return None
general_track = next((t for t in self.media_info.tracks if t.track_type == 'General'), None) general_track = next((t for t in self.media_info.tracks if t.track_type == 'General'), None)
if not general_track: if not general_track:
return None return None
format_ = getattr(general_track, 'format', None) format_ = getattr(general_track, 'format', None)
if format_ in self._format_to_extensions: if not format_:
exts = self._format_to_extensions[format_] return None
if format_ == 'Matroska':
if self.is_3d() and 'mk3d' in exts: # Use the constants function to get extension from format
return 'mk3d' ext = get_extension_from_format(format_)
else:
return 'mkv' # Special case: Matroska 3D uses mk3d extension
else: if ext == 'mkv' and self.is_3d():
return exts[0] if exts else None return 'mk3d'
return None
return ext
@cached_method() @cached_method()
def extract_3d_layout(self) -> str | None: def extract_3d_layout(self) -> str | None:
@@ -264,4 +297,50 @@ class MediaInfoExtractor:
if not self.is_3d(): if not self.is_3d():
return None return None
stereoscopic = getattr(self.video_tracks[0], 'stereoscopic', None) stereoscopic = getattr(self.video_tracks[0], 'stereoscopic', None)
return stereoscopic if stereoscopic else None return stereoscopic if stereoscopic else None
@cached_method()
def extract_interlaced(self) -> bool | None:
"""Determine if the video is interlaced.
Returns:
True: Video is interlaced
False: Video is progressive (explicitly set)
None: Information not available in MediaInfo
"""
if not self.video_tracks:
logger.debug(f"[{self.file_path.name}] Interlaced detection: No video tracks")
return None
scan_type_attr = getattr(self.video_tracks[0], 'scan_type', None)
interlaced = getattr(self.video_tracks[0], 'interlaced', None)
logger.debug(f"[{self.file_path.name}] Interlaced detection:")
logger.debug(f"[{self.file_path.name}] scan_type: {scan_type_attr!r} (type: {type(scan_type_attr).__name__})")
logger.debug(f"[{self.file_path.name}] interlaced: {interlaced!r} (type: {type(interlaced).__name__})")
# Check scan_type attribute first (e.g., "Interlaced", "Progressive", "MBAFF")
if scan_type_attr and isinstance(scan_type_attr, str):
scan_lower = scan_type_attr.lower()
if 'interlaced' in scan_lower or 'mbaff' in scan_lower:
logger.debug(f"[{self.file_path.name}] Result: True (from scan_type={scan_type_attr!r})")
return True
elif 'progressive' in scan_lower:
logger.debug(f"[{self.file_path.name}] Result: False (from scan_type={scan_type_attr!r})")
return False
# If scan_type has some other value, fall through to check interlaced
logger.debug(f"[{self.file_path.name}] scan_type unrecognized, checking interlaced attribute")
# Check interlaced attribute (e.g., "Yes", "No")
if interlaced and isinstance(interlaced, str):
interlaced_lower = interlaced.lower()
if interlaced_lower in ['yes', 'true', '1']:
logger.debug(f"[{self.file_path.name}] Result: True (from interlaced={interlaced!r})")
return True
elif interlaced_lower in ['no', 'false', '0']:
logger.debug(f"[{self.file_path.name}] Result: False (from interlaced={interlaced!r})")
return False
# No information available
logger.debug(f"[{self.file_path.name}] Result: None (no information available)")
return None

View File

@@ -1,50 +1,110 @@
"""Embedded metadata extractor using Mutagen.
This module provides the MetadataExtractor class for reading embedded
metadata tags from media files using the Mutagen library.
"""
import mutagen import mutagen
import logging
from pathlib import Path from pathlib import Path
from ..constants import MEDIA_TYPES from ..constants import MEDIA_TYPES
from ..decorators import cached_method from ..cache import cached_method, Cache
logger = logging.getLogger(__name__)
class MetadataExtractor: class MetadataExtractor:
"""Class to extract information from file metadata""" """Extractor for embedded metadata tags from media files.
def __init__(self, file_path: Path): This class uses the Mutagen library to read embedded metadata tags
such as title, artist, and duration. Falls back to MIME type detection
when Mutagen cannot read the file.
Attributes:
file_path: Path object pointing to the file
info: Mutagen file info object, or None if file cannot be read
_cache: Internal cache for method results
Example:
>>> from pathlib import Path
>>> extractor = MetadataExtractor(Path("movie.mkv"))
>>> title = extractor.extract_title()
>>> duration = extractor.extract_duration()
"""
def __init__(self, file_path: Path, use_cache: bool = True):
"""Initialize the MetadataExtractor.
Args:
file_path: Path object pointing to the media file
use_cache: Whether to use caching (default: True)
"""
self.file_path = file_path self.file_path = file_path
self._cache = {} # Internal cache for method results self.cache = Cache() if use_cache else None # Singleton cache for @cached_method decorator
self.settings = None # Will be set by Settings singleton if needed
self._cache: dict[str, any] = {} # Internal cache for method results
try: try:
self.info = mutagen.File(file_path) # type: ignore self.info = mutagen.File(file_path) # type: ignore
except Exception: except Exception as e:
logger.debug(f"Failed to read metadata from {file_path}: {e}")
self.info = None self.info = None
@cached_method() @cached_method()
def extract_title(self) -> str | None: def extract_title(self) -> str | None:
"""Extract title from metadata""" """Extract title from embedded metadata tags.
Returns:
Title string if found in metadata, None otherwise
"""
if self.info: if self.info:
return getattr(self.info, 'title', None) or getattr(self.info, 'get', lambda x, default=None: default)('title', [None])[0] # type: ignore return getattr(self.info, 'title', None) or getattr(self.info, 'get', lambda x, default=None: default)('title', [None])[0] # type: ignore
return None return None
@cached_method() @cached_method()
def extract_duration(self) -> float | None: def extract_duration(self) -> float | None:
"""Extract duration from metadata""" """Extract duration from metadata.
Returns:
Duration in seconds as a float, or None if not available
"""
if self.info: if self.info:
return getattr(self.info, 'length', None) return getattr(self.info, 'length', None)
return None return None
@cached_method() @cached_method()
def extract_artist(self) -> str | None: def extract_artist(self) -> str | None:
"""Extract artist from metadata""" """Extract artist from embedded metadata tags.
Returns:
Artist string if found in metadata, None otherwise
"""
if self.info: if self.info:
return getattr(self.info, 'artist', None) or getattr(self.info, 'get', lambda x, default=None: default)('artist', [None])[0] # type: ignore return getattr(self.info, 'artist', None) or getattr(self.info, 'get', lambda x, default=None: default)('artist', [None])[0] # type: ignore
return None return None
@cached_method() @cached_method()
def extract_meta_type(self) -> str: def extract_meta_type(self) -> str:
"""Extract meta type from metadata""" """Extract metadata container type.
Returns the Mutagen class name (e.g., "FLAC", "MP4") if available,
otherwise falls back to MIME type detection.
Returns:
Container type name, or "Unknown" if cannot be determined
"""
if self.info: if self.info:
return type(self.info).__name__ return type(self.info).__name__
return self._detect_by_mime() return self._detect_by_mime()
def _detect_by_mime(self) -> str: def _detect_by_mime(self) -> str:
"""Detect meta type by MIME""" """Detect metadata type by MIME type.
Uses python-magic library to detect file MIME type and maps it
to a metadata container type.
Returns:
Container type name based on MIME type, or "Unknown" if detection fails
"""
try: try:
import magic import magic
mime = magic.from_file(str(self.file_path), mime=True) mime = magic.from_file(str(self.file_path), mime=True)
@@ -52,5 +112,6 @@ class MetadataExtractor:
if info['mime'] == mime: if info['mime'] == mime:
return info['meta_type'] return info['meta_type']
return 'Unknown' return 'Unknown'
except Exception: except Exception as e:
logger.debug(f"Failed to detect MIME type for {self.file_path}: {e}")
return 'Unknown' return 'Unknown'

View File

@@ -13,10 +13,11 @@ from ..settings import Settings
class TMDBExtractor: class TMDBExtractor:
"""Class to extract TMDB movie information""" """Class to extract TMDB movie information"""
def __init__(self, file_path: Path): def __init__(self, file_path: Path, use_cache: bool = True):
self.file_path = file_path self.file_path = file_path
self.cache = Cache() self.cache = Cache() if use_cache else None # Singleton cache
self.ttl_seconds = Settings().get("cache_ttl_extractors", 21600) self.settings = Settings() # Singleton settings
self.ttl_seconds = self.settings.get("cache_ttl_extractors", 21600)
self._movie_db_info = None self._movie_db_info = None
def _get_cached_data(self, cache_key: str) -> Optional[Dict[str, Any]]: def _get_cached_data(self, cache_key: str) -> Optional[Dict[str, Any]]:
@@ -50,7 +51,8 @@ class TMDBExtractor:
response = requests.get(url, headers=headers, params=params, timeout=10) response = requests.get(url, headers=headers, params=params, timeout=10)
response.raise_for_status() response.raise_for_status()
return response.json() return response.json()
except (requests.RequestException, ValueError): except (requests.RequestException, ValueError) as e:
logging.warning(f"TMDB API request failed for {url}: {e}")
return None return None
def _search_movie_by_title_year(self, title: str, year: Optional[str] = None) -> Optional[Dict[str, Any]]: def _search_movie_by_title_year(self, title: str, year: Optional[str] = None) -> Optional[Dict[str, Any]]:
@@ -162,12 +164,16 @@ class TMDBExtractor:
filename_extractor = FilenameExtractor(self.file_path) filename_extractor = FilenameExtractor(self.file_path)
title = filename_extractor.extract_title() title = filename_extractor.extract_title()
year = filename_extractor.extract_year() year = filename_extractor.extract_year()
if title: if title:
movie_data = self._search_movie_by_title_year(title, year) search_result = self._search_movie_by_title_year(title, year)
if movie_data: if search_result and search_result.get('id'):
self._movie_db_info = movie_data # Fetch full movie details using the ID from search results
return movie_data movie_id = search_result['id']
movie_data = self._get_movie_details(movie_id)
if movie_data:
self._movie_db_info = movie_data
return movie_data
self._movie_db_info = None self._movie_db_info = None
return None return None
@@ -249,6 +255,13 @@ class TMDBExtractor:
return ', '.join(genre['name'] for genre in movie_info['genres']) return ', '.join(genre['name'] for genre in movie_info['genres'])
return None return None
def extract_production_countries(self) -> Optional[str]:
"""Extract TMDB production countries"""
movie_info = self._get_movie_info()
if movie_info and movie_info.get('production_countries'):
return ', '.join(country['name'] for country in movie_info['production_countries'])
return None
def extract_poster_path(self) -> Optional[str]: def extract_poster_path(self) -> Optional[str]:
"""Extract TMDB poster path""" """Extract TMDB poster path"""
movie_info = self._get_movie_info() movie_info = self._get_movie_info()
@@ -279,5 +292,6 @@ class TMDBExtractor:
# Cache image # Cache image
local_path = self.cache.set_image(cache_key, image_data, self.ttl_seconds) local_path = self.cache.set_image(cache_key, image_data, self.ttl_seconds)
return str(local_path) if local_path else None return str(local_path) if local_path else None
except requests.RequestException: except requests.RequestException as e:
logging.warning(f"Failed to download poster from {poster_url}: {e}")
return None return None

View File

@@ -1 +1,73 @@
# Formatters package """Formatters package - provides value formatting for display.
This package contains various formatter classes that transform raw values
into display-ready strings with optional styling.
All formatters should inherit from the Formatter ABC defined in base.py.
"""
from .base import (
Formatter,
DataFormatter,
TextFormatter as TextFormatterBase,
MarkupFormatter,
CompositeFormatter
)
from .text_formatter import TextFormatter
from .duration_formatter import DurationFormatter
from .size_formatter import SizeFormatter
from .date_formatter import DateFormatter
from .extension_formatter import ExtensionFormatter
from .resolution_formatter import ResolutionFormatter
from .track_formatter import TrackFormatter
from .special_info_formatter import SpecialInfoFormatter
# Decorator instances
from .date_decorators import date_decorators, DateDecorators
from .special_info_decorators import special_info_decorators, SpecialInfoDecorators
from .text_decorators import text_decorators, TextDecorators
from .conditional_decorators import conditional_decorators, ConditionalDecorators
from .size_decorators import size_decorators, SizeDecorators
from .extension_decorators import extension_decorators, ExtensionDecorators
from .duration_decorators import duration_decorators, DurationDecorators
from .resolution_decorators import resolution_decorators, ResolutionDecorators
from .track_decorators import track_decorators, TrackDecorators
__all__ = [
# Base classes
'Formatter',
'DataFormatter',
'TextFormatterBase',
'MarkupFormatter',
'CompositeFormatter',
# Concrete formatters
'TextFormatter',
'DurationFormatter',
'SizeFormatter',
'DateFormatter',
'ExtensionFormatter',
'ResolutionFormatter',
'TrackFormatter',
'SpecialInfoFormatter',
# Decorator instances and classes
'date_decorators',
'DateDecorators',
'special_info_decorators',
'SpecialInfoDecorators',
'text_decorators',
'TextDecorators',
'conditional_decorators',
'ConditionalDecorators',
'size_decorators',
'SizeDecorators',
'extension_decorators',
'ExtensionDecorators',
'duration_decorators',
'DurationDecorators',
'resolution_decorators',
'ResolutionDecorators',
'track_decorators',
'TrackDecorators',
]

148
renamer/formatters/base.py Normal file
View File

@@ -0,0 +1,148 @@
"""Base classes for formatters.
This module defines the Formatter Abstract Base Class (ABC) that all formatters
should inherit from. This ensures a consistent interface and enables type checking.
"""
from abc import ABC, abstractmethod
from typing import Any
class Formatter(ABC):
"""Abstract base class for all formatters.
All formatter classes should inherit from this base class and implement
the format() method. Formatters are responsible for transforming raw values
into display-ready strings.
The Formatter ABC supports three categories of formatters:
1. Data formatters: Transform raw data (e.g., bytes to "1.2 GB")
2. Text formatters: Transform text content (e.g., uppercase, lowercase)
3. Markup formatters: Add visual styling (e.g., bold, colored text)
Example:
class MyFormatter(Formatter):
@staticmethod
def format(value: Any) -> str:
return str(value).upper()
Note:
All formatter methods should be static methods to allow
usage without instantiation and composition in FormatterApplier.
"""
@staticmethod
@abstractmethod
def format(value: Any) -> str:
"""Format a value for display.
This is the core method that all formatters must implement.
It takes a raw value and returns a formatted string.
Args:
value: The value to format (type depends on formatter)
Returns:
The formatted string representation
Raises:
ValueError: If the value cannot be formatted
TypeError: If the value type is incompatible
Example:
>>> class SizeFormatter(Formatter):
... @staticmethod
... def format(value: int) -> str:
... return f"{value / 1024:.1f} KB"
>>> SizeFormatter.format(2048)
'2.0 KB'
"""
pass
class DataFormatter(Formatter):
"""Base class for data formatters.
Data formatters transform raw data values into human-readable formats.
Examples include:
- File sizes (bytes to "1.2 GB")
- Durations (seconds to "1h 23m")
- Dates (timestamp to "2024-01-15")
- Resolutions (width/height to "1920x1080")
Data formatters should be applied first in the formatting pipeline,
before text transformations and markup.
"""
pass
class TextFormatter(Formatter):
"""Base class for text formatters.
Text formatters transform text content without adding markup.
Examples include:
- Case transformations (uppercase, lowercase, camelcase)
- Text replacements
- String truncation
Text formatters should be applied after data formatters but before
markup formatters in the formatting pipeline.
"""
pass
class MarkupFormatter(Formatter):
"""Base class for markup formatters.
Markup formatters add visual styling using markup tags.
Examples include:
- Color formatting ([red]text[/red])
- Style formatting ([bold]text[/bold])
- Link formatting ([link=url]text[/link])
Markup formatters should be applied last in the formatting pipeline,
after all data and text transformations are complete.
"""
pass
class CompositeFormatter(Formatter):
"""Formatter that applies multiple formatters in sequence.
This class allows chaining multiple formatters together in a specific order.
Useful for creating complex formatting pipelines.
Example:
>>> formatters = [SizeFormatter, BoldFormatter, GreenFormatter]
>>> composite = CompositeFormatter(formatters)
>>> composite.format(1024)
'[bold green]1.0 KB[/bold green]'
Attributes:
formatters: List of formatter functions to apply in order
"""
def __init__(self, formatters: list[callable]):
"""Initialize the composite formatter.
Args:
formatters: List of formatter functions to apply in order
"""
self.formatters = formatters
def format(self, value: Any) -> str:
"""Apply all formatters in sequence.
Args:
value: The value to format
Returns:
The result after applying all formatters
Raises:
Exception: If any formatter in the chain raises an exception
"""
result = value
for formatter in self.formatters:
result = formatter(result)
return result

View File

@@ -1,15 +1,23 @@
from .text_formatter import TextFormatter from .text_formatter import TextFormatter
from renamer.views.posters import AsciiPosterRenderer, ViuPosterRenderer, RichPixelsPosterRenderer
from typing import Union
import os import os
class CatalogFormatter: class CatalogFormatter:
"""Formatter for catalog mode display""" """Formatter for catalog mode display"""
def __init__(self, extractor): def __init__(self, extractor, settings=None):
self.extractor = extractor self.extractor = extractor
self.settings = settings
def format_catalog_info(self) -> str: def format_catalog_info(self) -> tuple[str, Union[str, object]]:
"""Format catalog information for display""" """Format catalog information for display.
Returns:
Tuple of (info_text, poster_content)
poster_content can be a string or Rich Renderable object
"""
lines = [] lines = []
# Title # Title
@@ -49,59 +57,70 @@ class CatalogFormatter:
if genres: if genres:
lines.append(f"{TextFormatter.bold('Genres:')} {genres}") lines.append(f"{TextFormatter.bold('Genres:')} {genres}")
# Poster # Countries
poster_image_path = self.extractor.tmdb_extractor.extract_poster_image_path() countries = self.extractor.get("production_countries", "TMDB")
if poster_image_path: if countries:
lines.append(f"{TextFormatter.bold('Poster:')}") lines.append(f"{TextFormatter.bold('Countries:')} {countries}")
lines.append(self._display_poster(poster_image_path))
else: # Render text content with Rich markup
poster_path = self.extractor.get("poster_path", "TMDB") text_content = "\n\n".join(lines) if lines else "No catalog information available"
if poster_path:
lines.append(f"{TextFormatter.bold('Poster:')} {poster_path} (not cached yet)")
full_text = "\n\n".join(lines) if lines else "No catalog information available"
# Render markup to ANSI
from rich.console import Console from rich.console import Console
from io import StringIO from io import StringIO
console = Console(file=StringIO(), width=120, legacy_windows=False)
console.print(full_text, markup=True)
return console.file.getvalue()
def _display_poster(self, image_path: str) -> str: console = Console(file=StringIO(), width=120, legacy_windows=False)
"""Display poster image in terminal using simple ASCII art""" console.print(text_content, markup=True)
try: rendered_text = console.file.getvalue()
from PIL import Image
import os # Get poster separately
poster_content = self.get_poster()
if not os.path.exists(image_path):
return f"Image file not found: {image_path}" return rendered_text, poster_content
# Open and resize image def get_poster(self) -> Union[str, object]:
img = Image.open(image_path).convert('L').resize((80, 40), Image.Resampling.LANCZOS) """Get poster content for separate display.
# ASCII characters from dark to light Returns:
ascii_chars = '@%#*+=-:. ' Poster content (string or Rich Renderable) or empty string if no poster
"""
# Convert to ASCII poster_mode = self.settings.get("poster", "no") if self.settings else "no"
pixels = img.getdata()
width, height = img.size if poster_mode == "no":
return ""
ascii_art = []
for y in range(0, height, 2): # Skip every other row for aspect ratio poster_image_path = self.extractor.tmdb_extractor.extract_poster_image_path()
row = []
for x in range(width): if poster_image_path:
# Average of two rows for better aspect return self._display_poster(poster_image_path, poster_mode)
pixel1 = pixels[y * width + x] if y < height else 255 else:
pixel2 = pixels[(y + 1) * width + x] if y + 1 < height else 255 # Poster path not cached yet
avg = (pixel1 + pixel2) // 2 poster_path = self.extractor.get("poster_path", "TMDB")
char = ascii_chars[avg * len(ascii_chars) // 256] if poster_path:
row.append(char) return f"{TextFormatter.bold('Poster:')} {poster_path} (not cached yet)"
ascii_art.append(''.join(row)) return ""
return '\n'.join(ascii_art) def _display_poster(self, image_path: str, mode: str) -> Union[str, object]:
"""Display poster image based on mode setting.
except ImportError:
return f"Image at {image_path} (PIL not available)" Args:
except Exception as e: image_path: Path to the poster image
return f"Failed to display image at {image_path}: {e}" mode: Display mode - "pseudo" for ASCII art, "viu", "richpixels"
Returns:
Rendered poster (string or Rich Renderable object)
"""
if not os.path.exists(image_path):
return f"Image file not found: {image_path}"
# Select renderer based on mode
if mode == "viu":
renderer = ViuPosterRenderer()
elif mode == "pseudo":
renderer = AsciiPosterRenderer()
elif mode == "richpixels":
renderer = RichPixelsPosterRenderer()
else:
return f"Unknown poster mode: {mode}"
# Render the poster
return renderer.render(image_path, width=40)

View File

@@ -0,0 +1,117 @@
"""Conditional formatting decorators.
Provides decorators for conditional formatting (wrap, replace_slashes, default):
@conditional_decorators.wrap("[", "]")
def get_order(self):
return self.extractor.get('order')
"""
from functools import wraps
from typing import Callable, Any
class ConditionalDecorators:
"""Conditional formatting decorators (wrap, replace_slashes, default)."""
@staticmethod
def wrap(left: str, right: str = "") -> Callable:
"""Decorator to wrap value with delimiters if it exists.
Can be used for prefix-only (right=""), suffix-only (left=""), or both.
Supports format string placeholders that will be filled from function arguments.
Usage:
@conditional_decorators.wrap("[", "]")
def get_order(self):
return self.extractor.get('order')
# Prefix only
@conditional_decorators.wrap(" ")
def get_source(self):
return self.extractor.get('source')
# Suffix only
@conditional_decorators.wrap("", ",")
def get_hdr(self):
return self.extractor.get('hdr')
# With placeholders
@conditional_decorators.wrap("Track {index}: ")
def get_track(self, data, index):
return data
"""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> str:
result = func(*args, **kwargs)
if not result:
return ""
# Extract format arguments from function signature
# Skip 'self' (args[0]) and the main data argument
format_kwargs = {}
if len(args) > 2: # self, data, index, ...
# Try to detect named parameters from function signature
import inspect
sig = inspect.signature(func)
param_names = list(sig.parameters.keys())
# Skip first two params (self, data/track/value)
for i, param_name in enumerate(param_names[2:], start=2):
if i < len(args):
format_kwargs[param_name] = args[i]
# Also add explicit kwargs
format_kwargs.update(kwargs)
# Format left and right with available arguments
formatted_left = left.format(**format_kwargs) if format_kwargs else left
formatted_right = right.format(**format_kwargs) if format_kwargs else right
return f"{formatted_left}{result}{formatted_right}"
return wrapper
return decorator
@staticmethod
def replace_slashes() -> Callable:
"""Decorator to replace forward and back slashes with dashes.
Usage:
@conditional_decorators.replace_slashes()
def get_title(self):
return self.extractor.get('title')
"""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> str:
result = func(*args, **kwargs)
if result:
return str(result).replace("/", "-").replace("\\", "-")
return result or ""
return wrapper
return decorator
@staticmethod
def default(default_value: Any) -> Callable:
"""Decorator to provide a default value if result is None or empty.
NOTE: It's better to handle defaults in the extractor itself rather than
using this decorator. This decorator should only be used when the extractor
cannot provide a sensible default.
Usage:
@conditional_decorators.default("Unknown")
def get_value(self):
return self.extractor.get('value')
"""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> Any:
result = func(*args, **kwargs)
return result if result else default_value
return wrapper
return decorator
# Singleton instance
conditional_decorators = ConditionalDecorators()

View File

@@ -0,0 +1,37 @@
"""Date formatting decorators.
Provides decorator versions of DateFormatter methods for cleaner code:
@date_decorators.year()
def get_year(self):
return self.extractor.get('year')
"""
from functools import wraps
from typing import Callable
from .date_formatter import DateFormatter
class DateDecorators:
"""Date and time formatting decorators."""
@staticmethod
def modification_date() -> Callable:
"""Decorator to format modification dates.
Usage:
@date_decorators.modification_date()
def get_mtime(self):
return self.file_path.stat().st_mtime
"""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> str:
result = func(*args, **kwargs)
return DateFormatter.format_modification_date(result)
return wrapper
return decorator
# Singleton instance
date_decorators = DateDecorators()

View File

@@ -0,0 +1,42 @@
"""Duration formatting decorators.
Provides decorator versions of DurationFormatter methods.
"""
from functools import wraps
from typing import Callable
from .duration_formatter import DurationFormatter
class DurationDecorators:
"""Duration formatting decorators."""
@staticmethod
def duration_full() -> Callable:
"""Decorator to format duration in full format (HH:MM:SS)."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if not result:
return ""
return DurationFormatter.format_full(result)
return wrapper
return decorator
@staticmethod
def duration_short() -> Callable:
"""Decorator to format duration in short format."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if not result:
return ""
return DurationFormatter.format_short(result)
return wrapper
return decorator
# Singleton instance
duration_decorators = DurationDecorators()

View File

@@ -0,0 +1,29 @@
"""Extension formatting decorators.
Provides decorator versions of ExtensionFormatter methods.
"""
from functools import wraps
from typing import Callable
from .extension_formatter import ExtensionFormatter
class ExtensionDecorators:
"""Extension formatting decorators."""
@staticmethod
def extension_info() -> Callable:
"""Decorator to format extension information."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if not result:
return ""
return ExtensionFormatter.format_extension_info(result)
return wrapper
return decorator
# Singleton instance
extension_decorators = ExtensionDecorators()

View File

@@ -1,119 +0,0 @@
from .text_formatter import TextFormatter
from .duration_formatter import DurationFormatter
from .size_formatter import SizeFormatter
from .date_formatter import DateFormatter
from .extension_formatter import ExtensionFormatter
from .resolution_formatter import ResolutionFormatter
from .track_formatter import TrackFormatter
from .special_info_formatter import SpecialInfoFormatter
import logging
import inspect
import os
# Set up logging conditionally
if os.getenv('FORMATTER_LOG', '0') == '1':
logging.basicConfig(filename='formatter.log', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
else:
logging.basicConfig(level=logging.CRITICAL) # Disable logging
class FormatterApplier:
"""Class to apply multiple formatters in correct order"""
# Define the global order of all formatters
FORMATTER_ORDER = [
# Data formatters first (transform raw data)
DurationFormatter.format_seconds,
DurationFormatter.format_hhmmss,
DurationFormatter.format_hhmm,
DurationFormatter.format_full,
SizeFormatter.format_size,
SizeFormatter.format_size_full,
DateFormatter.format_modification_date,
DateFormatter.format_year,
ExtensionFormatter.format_extension_info,
ResolutionFormatter.format_resolution_dimensions,
TrackFormatter.format_video_track,
TrackFormatter.format_audio_track,
TrackFormatter.format_subtitle_track,
SpecialInfoFormatter.format_special_info,
SpecialInfoFormatter.format_database_info,
# Text formatters second (transform text content)
TextFormatter.uppercase,
TextFormatter.lowercase,
TextFormatter.camelcase,
# Markup formatters last (add visual styling)
TextFormatter.bold,
TextFormatter.italic,
TextFormatter.underline,
TextFormatter.bold_green,
TextFormatter.bold_cyan,
TextFormatter.bold_magenta,
TextFormatter.bold_yellow,
TextFormatter.green,
TextFormatter.yellow,
TextFormatter.magenta,
TextFormatter.cyan,
TextFormatter.red,
TextFormatter.blue,
TextFormatter.grey,
TextFormatter.dim,
TextFormatter.format_url,
]
@staticmethod
def apply_formatters(value, formatters):
"""Apply multiple formatters to value in the global order"""
if not isinstance(formatters, list):
formatters = [formatters] if formatters else []
# Sort formatters according to the global order
ordered_formatters = sorted(formatters, key=lambda f: FormatterApplier.FORMATTER_ORDER.index(f) if f in FormatterApplier.FORMATTER_ORDER else len(FormatterApplier.FORMATTER_ORDER))
# Apply in the ordered sequence
for formatter in ordered_formatters:
try:
old_value = value
value = formatter(value)
logging.debug(f"Applied {formatter.__name__ if hasattr(formatter, '__name__') else str(formatter)}: {repr(old_value)} -> {repr(value)}")
except Exception as e:
logging.error(f"Error applying {formatter.__name__ if hasattr(formatter, '__name__') else str(formatter)}: {e}")
value = "Unknown"
return value
@staticmethod
def format_data_item(item: dict) -> str | None:
"""Apply all formatting to a data item and return the formatted string"""
# Handle value formatting first (e.g., size formatting)
value = item.get("value")
if value is not None and value != "Not extracted":
value_formatters = item.get("value_formatters", [])
value = FormatterApplier.apply_formatters(value, value_formatters)
# Handle label formatting
label = item.get("label", "")
if label:
label_formatters = item.get("label_formatters", [])
label = FormatterApplier.apply_formatters(label, label_formatters)
# Create the display string
if value is not None:
display_string = f"{label}: {value}"
else:
display_string = label
# Handle display formatting (e.g., color)
display_formatters = item.get("display_formatters", [])
display_string = FormatterApplier.apply_formatters(display_string, display_formatters)
return display_string
@staticmethod
def format_data_items(data: list[dict]) -> list:
"""Apply formatting to a list of data items"""
return [FormatterApplier.format_data_item(item) for item in data]

View File

@@ -1,425 +0,0 @@
from pathlib import Path
from rich.markup import escape
from .size_formatter import SizeFormatter
from .date_formatter import DateFormatter
from .extension_formatter import ExtensionFormatter
from .text_formatter import TextFormatter
from .track_formatter import TrackFormatter
from .resolution_formatter import ResolutionFormatter
from .duration_formatter import DurationFormatter
from .special_info_formatter import SpecialInfoFormatter
from .formatter import FormatterApplier
class MediaFormatter:
"""Class to format media data for display"""
def __init__(self, extractor):
self.extractor = extractor
def file_info_panel(self) -> str:
"""Return formatted file info panel string"""
sections = [
self.file_info(),
self.selected_data(),
self.tmdb_data(),
self.tracks_info(),
self.filename_extracted_data(),
self.metadata_extracted_data(),
self.mediainfo_extracted_data(),
]
return "\n\n".join("\n".join(section) for section in sections)
def file_info(self) -> list[str]:
data = [
{
"group": "File Info",
"label": "File Info",
"label_formatters": [TextFormatter.bold, TextFormatter.uppercase],
},
{
"group": "File Info",
"label": "Path",
"label_formatters": [TextFormatter.bold],
"value": escape(str(self.extractor.get("file_path", "FileInfo"))),
"display_formatters": [TextFormatter.blue],
},
{
"group": "File Info",
"label": "Size",
"value": self.extractor.get("file_size", "FileInfo"),
"value_formatters": [SizeFormatter.format_size_full],
"display_formatters": [TextFormatter.bold, TextFormatter.green],
},
{
"group": "File Info",
"label": "Name",
"label_formatters": [TextFormatter.bold],
"value": escape(str(self.extractor.get("file_name", "FileInfo"))),
"display_formatters": [TextFormatter.cyan],
},
{
"group": "File Info",
"label": "Modified",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("modification_time", "FileInfo"),
"value_formatters": [DateFormatter.format_modification_date],
"display_formatters": [TextFormatter.bold, TextFormatter.magenta],
},
{
"group": "File Info",
"label": "Extension",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("extension", "FileInfo"),
"value_formatters": [ExtensionFormatter.format_extension_info],
"display_formatters": [TextFormatter.green],
},
]
return FormatterApplier.format_data_items(data)
def tmdb_data(self) -> list[str]:
"""Return formatted TMDB data"""
data = [
{
"label": "TMDB Data",
"label_formatters": [TextFormatter.bold, TextFormatter.uppercase],
},
{
"label": "ID",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("tmdb_id", "TMDB") or "<None>",
"value_formatters": [TextFormatter.yellow],
},
{
"label": "Title",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("title", "TMDB") or "<None>",
"value_formatters": [TextFormatter.yellow],
},
{
"label": "Original Title",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("original_title", "TMDB") or "<None>",
"value_formatters": [TextFormatter.yellow],
},
{
"label": "Year",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("year", "TMDB") or "<None>",
"value_formatters": [TextFormatter.yellow,],
},
{
"label": "Database Info",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("movie_db", "TMDB") or "<None>",
"value_formatters": [SpecialInfoFormatter.format_database_info, TextFormatter.yellow],
},
{
"label": "URL",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("tmdb_url", "TMDB") or "<None>",
"value_formatters": [TextFormatter.format_url],
}
]
return FormatterApplier.format_data_items(data)
def tracks_info(self) -> list[str]:
"""Return formatted tracks information"""
data = [
{
"group": "Tracks Info",
"label": "Tracks Info",
"label_formatters": [TextFormatter.bold, TextFormatter.uppercase],
}
]
# Get video tracks
video_tracks = self.extractor.get("video_tracks", "MediaInfo") or []
for item in video_tracks:
data.append(
{
"group": "Tracks Info",
"label": "Video Track",
"value": item,
"value_formatters": TrackFormatter.format_video_track,
"display_formatters": [TextFormatter.green],
}
)
# Get audio tracks
audio_tracks = self.extractor.get("audio_tracks", "MediaInfo") or []
for i, item in enumerate(audio_tracks, start=1):
data.append(
{
"group": "Tracks Info",
"label": f"Audio Track {i}",
"value": item,
"value_formatters": TrackFormatter.format_audio_track,
"display_formatters": [TextFormatter.yellow],
}
)
# Get subtitle tracks
subtitle_tracks = self.extractor.get("subtitle_tracks", "MediaInfo") or []
for i, item in enumerate(subtitle_tracks, start=1):
data.append(
{
"group": "Tracks Info",
"label": f"Subtitle Track {i}",
"value": item,
"value_formatters": TrackFormatter.format_subtitle_track,
"display_formatters": [TextFormatter.magenta],
}
)
return FormatterApplier.format_data_items(data)
def metadata_extracted_data(self) -> list[str]:
"""Format metadata extraction data for the metadata panel"""
data = [
{
"label": "Metadata Extraction",
"label_formatters": [TextFormatter.bold, TextFormatter.uppercase],
},
{
"label": "Title",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("title", "Metadata") or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
{
"label": "Duration",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("duration", "Metadata") or "Not extracted",
"value_formatters": [DurationFormatter.format_full],
"display_formatters": [TextFormatter.grey],
},
{
"label": "Artist",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("artist", "Metadata") or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
]
return FormatterApplier.format_data_items(data)
def mediainfo_extracted_data(self) -> list[str]:
"""Format media info extraction data for the mediainfo panel"""
data = [
{
"label": "Media Info Extraction",
"label_formatters": [TextFormatter.bold, TextFormatter.uppercase],
},
{
"label": "Duration",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("duration", "MediaInfo") or "Not extracted",
"value_formatters": [DurationFormatter.format_full],
"display_formatters": [TextFormatter.grey],
},
{
"label": "Frame Class",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("frame_class", "MediaInfo")
or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
{
"label": "Resolution",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("resolution", "MediaInfo")
or "Not extracted",
"value_formatters": [ResolutionFormatter.format_resolution_dimensions],
"display_formatters": [TextFormatter.grey],
},
{
"label": "Aspect Ratio",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("aspect_ratio", "MediaInfo")
or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
{
"label": "HDR",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("hdr", "MediaInfo") or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
{
"label": "Audio Languages",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("audio_langs", "MediaInfo")
or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
{
"label": "Anamorphic",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("anamorphic", "MediaInfo") or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
{
"label": "Extension",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("extension", "MediaInfo") or "Not extracted",
"value_formatters": [ExtensionFormatter.format_extension_info],
"display_formatters": [TextFormatter.grey],
},
{
"label": "3D Layout",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("3d_layout", "MediaInfo") or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
]
return FormatterApplier.format_data_items(data)
def filename_extracted_data(self) -> list[str]:
"""Return formatted filename extracted data"""
data = [
{
"label": "Filename Extracted Data",
"label_formatters": [TextFormatter.bold, TextFormatter.uppercase],
},
{
"label": "Order",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("order", "Filename") or "Not extracted",
"display_formatters": [TextFormatter.yellow],
},
{
"label": "Movie title",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("title", "Filename"),
"display_formatters": [TextFormatter.grey],
},
{
"label": "Year",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("year", "Filename"),
"display_formatters": [TextFormatter.grey],
},
{
"label": "Video source",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("source", "Filename") or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
{
"label": "Frame class",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("frame_class", "Filename")
or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
{
"label": "HDR",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("hdr", "Filename") or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
{
"label": "Audio langs",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("audio_langs", "Filename")
or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
{
"label": "Special info",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("special_info", "Filename")
or "Not extracted",
"value_formatters": [
SpecialInfoFormatter.format_special_info,
TextFormatter.blue,
],
"display_formatters": [TextFormatter.grey],
},
{
"label": "Movie DB",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("movie_db", "Filename") or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
]
return FormatterApplier.format_data_items(data)
def selected_data(self) -> list[str]:
"""Return formatted selected data string"""
import logging
import os
if os.getenv("FORMATTER_LOG"):
frame_class = self.extractor.get("frame_class")
audio_langs = self.extractor.get("audio_langs")
logging.info(f"Selected data - frame_class: {frame_class!r}, audio_langs: {audio_langs!r}")
# Also check from Filename source
frame_class_filename = self.extractor.get("frame_class", "Filename")
audio_langs_filename = self.extractor.get("audio_langs", "Filename")
logging.info(f"From Filename - frame_class: {frame_class_filename!r}, audio_langs: {audio_langs_filename!r}")
data = [
{
"label": "Selected Data",
"label_formatters": [TextFormatter.bold, TextFormatter.uppercase],
},
{
"label": "Order",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("order") or "<None>",
"value_formatters": [TextFormatter.yellow],
},
{
"label": "Title",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("title") or "<None>",
"value_formatters": [TextFormatter.yellow],
},
{
"label": "Year",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("year") or "<None>",
"value_formatters": [TextFormatter.yellow],
},
{
"label": "Special info",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("special_info") or "<None>",
"value_formatters": [
SpecialInfoFormatter.format_special_info,
TextFormatter.yellow,
],
},
{
"label": "Source",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("source") or "<None>",
"value_formatters": [TextFormatter.yellow],
},
{
"label": "Frame class",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("frame_class") or "<None>",
"value_formatters": [TextFormatter.yellow],
},
{
"label": "HDR",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("hdr") or "<None>",
"value_formatters": [TextFormatter.yellow],
},
{
"label": "Audio langs",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("audio_langs") or "<None>",
"value_formatters": [TextFormatter.yellow],
},
{
"label": "Database Info",
"label_formatters": [TextFormatter.bold, TextFormatter.blue],
"value": self.extractor.get("movie_db") or "<None>",
"value_formatters": [SpecialInfoFormatter.format_database_info, TextFormatter.yellow],
}
]
return FormatterApplier.format_data_items(data)

View File

@@ -1,37 +0,0 @@
from rich.markup import escape
from .text_formatter import TextFormatter
from .date_formatter import DateFormatter
from .special_info_formatter import SpecialInfoFormatter
class ProposedNameFormatter:
"""Class for formatting proposed filenames"""
def __init__(self, extractor):
"""Initialize with media extractor data"""
self.__order = f"[{extractor.get('order')}] " if extractor.get("order") else ""
self.__title = (extractor.get("title") or "Unknown Title").replace("/", "-").replace("\\", "-")
self.__year = DateFormatter.format_year(extractor.get("year"))
self.__source = f" {extractor.get('source')}" if extractor.get("source") else ""
self.__frame_class = extractor.get("frame_class") or None
self.__hdr = f",{extractor.get('hdr')}" if extractor.get("hdr") else ""
self.__audio_langs = extractor.get("audio_langs") or None
self.__special_info = f" [{SpecialInfoFormatter.format_special_info(extractor.get('special_info'))}]" if extractor.get("special_info") else ""
self.__db_info = f" [{SpecialInfoFormatter.format_database_info(extractor.get('movie_db'))}]" if extractor.get("movie_db") else ""
self.__extension = extractor.get("extension") or "ext"
def __str__(self) -> str:
"""Convert the proposed name to string"""
return self.rename_line()
def rename_line(self) -> str:
result = f"{self.__order}{self.__title} {self.__year}{self.__special_info}{self.__source} [{self.__frame_class}{self.__hdr},{self.__audio_langs}]{self.__db_info}.{self.__extension}"
return result.replace("/", "-").replace("\\", "-")
def rename_line_formatted(self, file_path) -> str:
"""Format the proposed name for display with color"""
proposed = escape(str(self))
if file_path.name == str(self):
return f">> {TextFormatter.green(proposed)} <<"
return f">> {TextFormatter.bold_yellow(proposed)} <<"

View File

@@ -0,0 +1,29 @@
"""Resolution formatting decorators.
Provides decorator versions of ResolutionFormatter methods.
"""
from functools import wraps
from typing import Callable
from .resolution_formatter import ResolutionFormatter
class ResolutionDecorators:
"""Resolution formatting decorators."""
@staticmethod
def resolution_dimensions() -> Callable:
"""Decorator to format resolution as dimensions (WxH)."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if not result:
return ""
return ResolutionFormatter.format_resolution_dimensions(result)
return wrapper
return decorator
# Singleton instance
resolution_decorators = ResolutionDecorators()

View File

@@ -0,0 +1,42 @@
"""Size formatting decorators.
Provides decorator versions of SizeFormatter methods.
"""
from functools import wraps
from typing import Callable
from .size_formatter import SizeFormatter
class SizeDecorators:
"""Size formatting decorators."""
@staticmethod
def size_full() -> Callable:
"""Decorator to format file size in full format."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if result is None:
return ""
return SizeFormatter.format_size_full(result)
return wrapper
return decorator
@staticmethod
def size_short() -> Callable:
"""Decorator to format file size in short format."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if result is None:
return ""
return SizeFormatter.format_size_short(result)
return wrapper
return decorator
# Singleton instance
size_decorators = SizeDecorators()

View File

@@ -1,6 +1,6 @@
class SizeFormatter: class SizeFormatter:
"""Class for formatting file sizes""" """Class for formatting file sizes"""
@staticmethod @staticmethod
def format_size(bytes_size: int) -> str: def format_size(bytes_size: int) -> str:
"""Format bytes to human readable with unit""" """Format bytes to human readable with unit"""
@@ -9,9 +9,14 @@ class SizeFormatter:
return f"{bytes_size:.1f} {unit}" return f"{bytes_size:.1f} {unit}"
bytes_size /= 1024 bytes_size /= 1024
return f"{bytes_size:.1f} TB" return f"{bytes_size:.1f} TB"
@staticmethod @staticmethod
def format_size_full(bytes_size: int) -> str: def format_size_full(bytes_size: int) -> str:
"""Format size with both human readable and bytes""" """Format size with both human readable and bytes"""
size_formatted = SizeFormatter.format_size(bytes_size) size_formatted = SizeFormatter.format_size(bytes_size)
return f"{size_formatted} ({bytes_size:,} bytes)" return f"{size_formatted} ({bytes_size:,} bytes)"
@staticmethod
def format_size_short(bytes_size: int) -> str:
"""Format size with only human readable"""
return SizeFormatter.format_size(bytes_size)

View File

@@ -0,0 +1,54 @@
"""Special info formatting decorators.
Provides decorator versions of SpecialInfoFormatter methods:
@special_info_decorators.special_info()
def get_special_info(self):
return self.extractor.get('special_info')
"""
from functools import wraps
from typing import Callable
from .special_info_formatter import SpecialInfoFormatter
class SpecialInfoDecorators:
"""Special info and database formatting decorators."""
@staticmethod
def special_info() -> Callable:
"""Decorator to format special info lists.
Usage:
@special_info_decorators.special_info()
def get_special_info(self):
return self.extractor.get('special_info')
"""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> str:
result = func(*args, **kwargs)
return SpecialInfoFormatter.format_special_info(result)
return wrapper
return decorator
@staticmethod
def database_info() -> Callable:
"""Decorator to format database info.
Usage:
@special_info_decorators.database_info()
def get_db_info(self):
return self.extractor.get('movie_db')
"""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> str:
result = func(*args, **kwargs)
return SpecialInfoFormatter.format_database_info(result)
return wrapper
return decorator
# Singleton instance
special_info_decorators = SpecialInfoDecorators()

View File

@@ -15,21 +15,15 @@ class SpecialInfoFormatter:
"""Format database info dictionary or tuple/list into a string""" """Format database info dictionary or tuple/list into a string"""
import logging import logging
import os import os
if os.getenv("FORMATTER_LOG"):
logging.info(f"format_database_info called with: {database_info!r} (type: {type(database_info)})")
if isinstance(database_info, dict) and 'name' in database_info and 'id' in database_info: if isinstance(database_info, dict) and 'name' in database_info and 'id' in database_info:
db_name = database_info['name'] db_name = database_info['name']
db_id = database_info['id'] db_id = database_info['id']
result = f"{db_name}id-{db_id}" result = f"{db_name}id-{db_id}"
if os.getenv("FORMATTER_LOG"):
logging.info(f"Formatted dict to: {result!r}")
return result return result
elif isinstance(database_info, (tuple, list)) and len(database_info) == 2: elif isinstance(database_info, (tuple, list)) and len(database_info) == 2:
db_name, db_id = database_info db_name, db_id = database_info
result = f"{db_name}id-{db_id}" result = f"{db_name}id-{db_id}"
if os.getenv("FORMATTER_LOG"):
logging.info(f"Formatted tuple/list to: {result!r}")
return result return result
if os.getenv("FORMATTER_LOG"): if os.getenv("FORMATTER_LOG"):
logging.info("Returning 'Unknown'") logging.info("Returning None")
return "Unknown" return None

View File

@@ -0,0 +1,112 @@
"""Text formatting decorators.
Provides decorator versions of TextFormatter methods:
@text_decorators.bold()
def get_title(self):
return self.title
"""
from functools import wraps
from typing import Callable
from .text_formatter import TextFormatter
class TextDecorators:
"""Text styling and color decorators."""
@staticmethod
def bold() -> Callable:
"""Decorator to make text bold."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> str:
result = func(*args, **kwargs)
if result == "":
return ""
return TextFormatter.bold(str(result))
return wrapper
return decorator
@staticmethod
def italic() -> Callable:
"""Decorator to make text italic."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> str:
result = func(*args, **kwargs)
if result == "":
return ""
return TextFormatter.italic(str(result))
return wrapper
return decorator
@staticmethod
def colour(name) -> Callable:
"""Decorator to colour text."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> str:
result = func(*args, **kwargs)
if not result:
return ""
return TextFormatter.colour(name, str(result))
return wrapper
return decorator
@staticmethod
def uppercase() -> Callable:
"""Decorator to convert text to uppercase."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> str:
result = func(*args, **kwargs)
if not result:
return ""
return TextFormatter.uppercase(str(result))
return wrapper
return decorator
@staticmethod
def lowercase() -> Callable:
"""Decorator to convert text to lowercase."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> str:
result = func(*args, **kwargs)
if not result:
return ""
return TextFormatter.lowercase(str(result))
return wrapper
return decorator
@staticmethod
def url() -> Callable:
"""Decorator to format text as a clickable URL."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> str:
result = func(*args, **kwargs)
if not result:
return ""
return TextFormatter.format_url(str(result))
return wrapper
return decorator
@staticmethod
def escape() -> Callable:
"""Decorator to escape rich markup in text."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> str:
from rich.markup import escape
result = func(*args, **kwargs)
if not result:
return ""
return escape(str(result))
return wrapper
return decorator
# Singleton instance
text_decorators = TextDecorators()

View File

@@ -27,80 +27,45 @@ class TextFormatter:
return ''.join(word.capitalize() for word in text.split()) return ''.join(word.capitalize() for word in text.split())
@staticmethod @staticmethod
def bold_green(text: str) -> str: def colour(colour_name: str, text: str) -> str:
"""Deprecated: Use [TextFormatter.bold, TextFormatter.green] instead""" """Generic method to color text with given colour name."""
import warnings return f"[{colour_name}]{text}[/{colour_name}]"
warnings.warn(
"TextFormatter.bold_green is deprecated. Use [TextFormatter.bold, TextFormatter.green] instead.",
DeprecationWarning,
stacklevel=2
)
return f"[bold green]{text}[/bold green]"
@staticmethod
def bold_cyan(text: str) -> str:
"""Deprecated: Use [TextFormatter.bold, TextFormatter.cyan] instead"""
import warnings
warnings.warn(
"TextFormatter.bold_cyan is deprecated. Use [TextFormatter.bold, TextFormatter.cyan] instead.",
DeprecationWarning,
stacklevel=2
)
return f"[bold cyan]{text}[/bold cyan]"
@staticmethod
def bold_magenta(text: str) -> str:
"""Deprecated: Use [TextFormatter.bold, TextFormatter.magenta] instead"""
import warnings
warnings.warn(
"TextFormatter.bold_magenta is deprecated. Use [TextFormatter.bold, TextFormatter.magenta] instead.",
DeprecationWarning,
stacklevel=2
)
return f"[bold magenta]{text}[/bold magenta]"
@staticmethod
def bold_yellow(text: str) -> str:
"""Deprecated: Use [TextFormatter.bold, TextFormatter.yellow] instead"""
import warnings
warnings.warn(
"TextFormatter.bold_yellow is deprecated. Use [TextFormatter.bold, TextFormatter.yellow] instead.",
DeprecationWarning,
stacklevel=2
)
return f"[bold yellow]{text}[/bold yellow]"
@staticmethod @staticmethod
def green(text: str) -> str: def green(text: str) -> str:
return f"[green]{text}[/green]" return TextFormatter.colour("green", text)
@staticmethod @staticmethod
def yellow(text: str) -> str: def yellow(text: str) -> str:
return f"[yellow]{text}[/yellow]" return TextFormatter.colour("yellow", text)
@staticmethod
def orange(text: str) -> str:
return TextFormatter.colour("orange", text)
@staticmethod @staticmethod
def magenta(text: str) -> str: def magenta(text: str) -> str:
return f"[magenta]{text}[/magenta]" return TextFormatter.colour("magenta", text)
@staticmethod @staticmethod
def cyan(text: str) -> str: def cyan(text: str) -> str:
return f"[cyan]{text}[/cyan]" return TextFormatter.colour("cyan", text)
@staticmethod @staticmethod
def red(text: str) -> str: def red(text: str) -> str:
return f"[red]{text}[/red]" return TextFormatter.colour("red", text)
@staticmethod @staticmethod
def blue(text: str) -> str: def blue(text: str) -> str:
return f"[blue]{text}[/blue]" return TextFormatter.colour("blue", text)
@staticmethod @staticmethod
def grey(text: str) -> str: def grey(text: str) -> str:
return f"[grey]{text}[/grey]" return TextFormatter.colour("grey", text)
@staticmethod @staticmethod
def dim(text: str) -> str: def dim(text: str) -> str:
return f"[dim]{text}[/dim]" return TextFormatter.colour("dimgray", text)
@staticmethod @staticmethod
def link(url: str, text: str | None = None) -> str: def link(url: str, text: str | None = None) -> str:
@@ -115,4 +80,4 @@ class TextFormatter:
if url and url != "<None>" and url.startswith("http"): if url and url != "<None>" and url.startswith("http"):
# Use OSC 8 hyperlink escape sequence for clickable links # Use OSC 8 hyperlink escape sequence for clickable links
return f"\x1b]8;;{url}\x1b\\Open in TMDB\x1b]8;;\x1b\\" return f"\x1b]8;;{url}\x1b\\Open in TMDB\x1b]8;;\x1b\\"
return url return url

View File

@@ -0,0 +1,55 @@
"""Track formatting decorators.
Provides decorator versions of TrackFormatter methods.
"""
from functools import wraps
from typing import Callable
from .track_formatter import TrackFormatter
class TrackDecorators:
"""Track formatting decorators."""
@staticmethod
def video_track() -> Callable:
"""Decorator to format video track data."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if not result:
return ""
return TrackFormatter.format_video_track(result)
return wrapper
return decorator
@staticmethod
def audio_track() -> Callable:
"""Decorator to format audio track data."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if not result:
return ""
return TrackFormatter.format_audio_track(result)
return wrapper
return decorator
@staticmethod
def subtitle_track() -> Callable:
"""Decorator to format subtitle track data."""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if not result:
return ""
return TrackFormatter.format_subtitle_track(result)
return wrapper
return decorator
# Singleton instance
track_decorators = TrackDecorators()

View File

@@ -7,18 +7,19 @@ class TrackFormatter:
codec = track.get('codec', 'unknown') codec = track.get('codec', 'unknown')
width = track.get('width', '?') width = track.get('width', '?')
height = track.get('height', '?') height = track.get('height', '?')
bitrate = track.get('bitrate') bitrate = track.get('bitrate') # in bps
bitrate_kbps = int(round(bitrate / 1024)) if bitrate else None
fps = track.get('fps') fps = track.get('fps')
profile = track.get('profile') profile = track.get('profile')
video_str = f"{codec} {width}x{height}" video_str = f"{codec} {width}x{height}"
if bitrate: if bitrate_kbps:
video_str += f" {bitrate}bps" video_str += f" {bitrate_kbps}kbps"
if fps: if fps:
video_str += f" {fps}fps" video_str += f" {fps}fps"
if profile: if profile:
video_str += f" ({profile})" video_str += f" ({profile})"
return video_str return video_str
@staticmethod @staticmethod
@@ -27,12 +28,12 @@ class TrackFormatter:
codec = track.get('codec', 'unknown') codec = track.get('codec', 'unknown')
channels = track.get('channels', '?') channels = track.get('channels', '?')
lang = track.get('language', 'und') lang = track.get('language', 'und')
bitrate = track.get('bitrate') bitrate = track.get('bitrate') # in bps
bitrate_kbps = int(round(bitrate / 1024)) if bitrate else None
audio_str = f"{codec} {channels}ch {lang}" audio_str = f"{codec} {channels}ch {lang}"
if bitrate: if bitrate_kbps:
audio_str += f" {bitrate}bps" audio_str += f" {bitrate_kbps}kbps"
return audio_str return audio_str
@staticmethod @staticmethod
@@ -40,5 +41,5 @@ class TrackFormatter:
"""Format a subtitle track dict into a display string""" """Format a subtitle track dict into a display string"""
lang = track.get('language', 'und') lang = track.get('language', 'und')
format = track.get('format', 'unknown') format = track.get('format', 'unknown')
return f"{lang} ({format})" return f"{lang} ({format})"

46
renamer/logging_config.py Normal file
View File

@@ -0,0 +1,46 @@
"""Singleton logging configuration for the renamer application.
This module provides centralized logging configuration that is initialized
once and used throughout the application.
"""
import logging
import os
import threading
class LoggerConfig:
"""Singleton logger configuration."""
_instance = None
_lock = threading.Lock()
_initialized = False
def __new__(cls):
"""Create or return singleton instance."""
if cls._instance is None:
with cls._lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
def __init__(self):
"""Initialize logging configuration (only once)."""
if LoggerConfig._initialized:
return
# Check environment variable for formatter logging
if os.getenv('FORMATTER_LOG', '0') == '1':
logging.basicConfig(
filename='formatter.log',
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
else:
logging.basicConfig(level=logging.INFO)
LoggerConfig._initialized = True
# Initialize logging on import
LoggerConfig()

Some files were not shown because too many files have changed in this diff Show More