From 2dce8079845c603582515f97d903ec4d3eaf2c8e Mon Sep 17 00:00:00 2001 From: sHa Date: Fri, 26 Dec 2025 19:10:49 +0000 Subject: [PATCH] feat: Introduce DefaultExtractor for fallback values and refactor extraction logic in MediaExtractor --- renamer/extractor.py | 259 ++++++++++++---------- renamer/extractors/default_extractor.py | 50 +++++ renamer/extractors/filename_extractor.py | 8 +- renamer/extractors/mediainfo_extractor.py | 6 +- renamer/test/test_filename_extractor.py | 11 +- 5 files changed, 204 insertions(+), 130 deletions(-) create mode 100644 renamer/extractors/default_extractor.py diff --git a/renamer/extractor.py b/renamer/extractor.py index 1d25dce..ea2e3f6 100644 --- a/renamer/extractor.py +++ b/renamer/extractor.py @@ -3,137 +3,160 @@ from .extractors.filename_extractor import FilenameExtractor from .extractors.metadata_extractor import MetadataExtractor from .extractors.mediainfo_extractor import MediaInfoExtractor from .extractors.fileinfo_extractor import FileInfoExtractor +from .extractors.default_extractor import DefaultExtractor class MediaExtractor: """Class to extract various metadata from media files using specialized extractors""" def __init__(self, file_path: Path): - self.file_path = file_path self.filename_extractor = FilenameExtractor(file_path) self.metadata_extractor = MetadataExtractor(file_path) self.mediainfo_extractor = MediaInfoExtractor(file_path) self.fileinfo_extractor = FileInfoExtractor(file_path) - - # Define sources for each data type - self._sources = { - 'title': [ - ('Metadata', lambda: self.metadata_extractor.extract_title()), - ('Filename', lambda: self.filename_extractor.extract_title()) - ], - 'year': [ - ('Filename', lambda: self.filename_extractor.extract_year()) - ], - 'source': [ - ('Filename', lambda: self.filename_extractor.extract_source()) - ], - 'frame_class': [ - ('MediaInfo', lambda: self.mediainfo_extractor.extract_frame_class()), - ('Filename', lambda: self.filename_extractor.extract_frame_class()) - ], - 'resolution': [ - ('MediaInfo', lambda: self.mediainfo_extractor.extract_resolution()) - ], - 'aspect_ratio': [ - ('MediaInfo', lambda: self.mediainfo_extractor.extract_aspect_ratio()) - ], - 'hdr': [ - ('MediaInfo', lambda: self.mediainfo_extractor.extract_hdr()), - ('Filename', lambda: self.filename_extractor.extract_hdr()) - ], - 'movie_db': [ - ('Filename', lambda: self.filename_extractor.extract_movie_db()) - ], - 'audio_langs': [ - ('MediaInfo', lambda: self.mediainfo_extractor.extract_audio_langs()), - ('Filename', lambda: self.filename_extractor.extract_audio_langs()) - ], - 'meta_type': [ - ('Metadata', lambda: self.metadata_extractor.extract_meta_type()) - ], - 'file_size': [ - ('FileInfo', lambda: self.fileinfo_extractor.extract_size()) - ], - 'modification_time': [ - ('FileInfo', lambda: self.fileinfo_extractor.extract_modification_time()) - ], - 'file_name': [ - ('FileInfo', lambda: self.fileinfo_extractor.extract_file_name()) - ], - 'file_path': [ - ('FileInfo', lambda: self.fileinfo_extractor.extract_file_path()) - ], - 'extension': [ - ('FileInfo', lambda: self.fileinfo_extractor.extract_extension()) - ], - 'video_tracks': [ - ('MediaInfo', lambda: self.mediainfo_extractor.extract_video_tracks()) - ], - 'audio_tracks': [ - ('MediaInfo', lambda: self.mediainfo_extractor.extract_audio_tracks()) - ], - 'subtitle_tracks': [ - ('MediaInfo', lambda: self.mediainfo_extractor.extract_subtitle_tracks()) - ], + self.default_extractor = DefaultExtractor() + + # Extractor mapping + self._extractors = { + "Metadata": self.metadata_extractor, + "Filename": self.filename_extractor, + "MediaInfo": self.mediainfo_extractor, + "FileInfo": self.fileinfo_extractor, + "Default": self.default_extractor, } - - # Conditions for when a value is considered valid - self._conditions = { - 'title': lambda x: x is not None, - 'year': lambda x: x is not None, - 'source': lambda x: x is not None, - 'frame_class': lambda x: x and x != 'Unclassified', - 'resolution': lambda x: x is not None, - 'aspect_ratio': lambda x: x is not None, - 'hdr': lambda x: x is not None, - 'movie_db': lambda x: x is not None, - 'audio_langs': lambda x: x is not None, - 'tracks': lambda x: x is not None and any(x.get(k, []) for k in ['video_tracks', 'audio_tracks', 'subtitle_tracks']), - 'video_tracks': lambda x: x is not None and len(x) > 0, - 'audio_tracks': lambda x: x is not None and len(x) > 0, - 'subtitle_tracks': lambda x: x is not None and len(x) > 0, + + # Define sources and conditions for each data type + self._data = { + "title": { + "sources": [ + ("Metadata", "extract_title"), + ("Filename", "extract_title"), + ("Default", "extract_title"), + ], + }, + "year": { + "sources": [ + ("Filename", "extract_year"), + ("Default", "extract_year"), + ], + }, + "source": { + "sources": [ + ("Filename", "extract_source"), + ("Default", "extract_source"), + ], + }, + "frame_class": { + "sources": [ + ("MediaInfo", "extract_frame_class"), + ("Filename", "extract_frame_class"), + ("Default", "extract_frame_class"), + ], + }, + "resolution": { + "sources": [ + ("MediaInfo", "extract_resolution"), + ("Default", "extract_resolution"), + ], + }, + "hdr": { + "sources": [ + ("MediaInfo", "extract_hdr"), + ("Filename", "extract_hdr"), + ("Default", "extract_hdr"), + ], + }, + "movie_db": { + "sources": [ + ("Filename", "extract_movie_db"), + ("Default", "extract_movie_db"), + ], + }, + "audio_langs": { + "sources": [ + ("MediaInfo", "extract_audio_langs"), + ("Filename", "extract_audio_langs"), + ("Default", "extract_audio_langs"), + ], + }, + "meta_type": { + "sources": [ + ("Metadata", "extract_meta_type"), + ("Default", "extract_meta_type"), + ], + }, + "file_size": { + "sources": [ + ("FileInfo", "extract_size"), + ("Default", "extract_size"), + ], + }, + "modification_time": { + "sources": [ + ("FileInfo", "extract_modification_time"), + ("Default", "extract_modification_time"), + ], + }, + "file_name": { + "sources": [ + ("FileInfo", "extract_file_name"), + ("Default", "extract_file_name"), + ], + }, + "file_path": { + "sources": [ + ("FileInfo", "extract_file_path"), + ("Default", "extract_file_path"), + ], + }, + "extension": { + "sources": [ + ("FileInfo", "extract_extension"), + ("Default", "extract_extension"), + ], + }, + "video_tracks": { + "sources": [ + ("MediaInfo", "extract_video_tracks"), + ("Default", "extract_video_tracks"), + ], + }, + "audio_tracks": { + "sources": [ + ("MediaInfo", "extract_audio_tracks"), + ("Default", "extract_audio_tracks"), + ], + }, + "subtitle_tracks": { + "sources": [ + ("MediaInfo", "extract_subtitle_tracks"), + ("Default", "extract_subtitle_tracks"), + ], + }, } def get(self, key: str, source: str | None = None): """Get extracted data by key, optionally from specific source""" - if key in self._sources: - condition = self._conditions.get(key, lambda x: x is not None) - - if source: - for src, func in self._sources[key]: - if src.lower() == source.lower(): - val = func() - return val if condition(val) else None - return None # Source not found for this key, return None - else: - # Use fallback: return first valid value - for src, func in self._sources[key]: - val = func() - if condition(val): - return val - return None + if source: + # Specific source requested - find the extractor and call the method directly + for extractor_name, extractor in self._extractors.items(): + if extractor_name.lower() == source.lower(): + method = f"extract_{key}" + if hasattr(extractor, method): + return getattr(extractor, method)() + return None + + # Fallback mode - try sources in order + if key in self._data: + sources = self._data[key]["sources"] else: - # Key not in _sources, try to call extract_ on extractors - extract_method = f'extract_{key}' - extractors = [ - ('MediaInfo', self.mediainfo_extractor), - ('Metadata', self.metadata_extractor), - ('Filename', self.filename_extractor), - ('FileInfo', self.fileinfo_extractor) - ] - - if source: - for src_name, extractor in extractors: - if src_name.lower() == source.lower(): - if hasattr(extractor, extract_method): - val = getattr(extractor, extract_method)() - return val - return None - else: - # Try all extractors in order - for src_name, extractor in extractors: - if hasattr(extractor, extract_method): - val = getattr(extractor, extract_method)() - if val is not None: - return val - return None \ No newline at end of file + # Try extractors in order for unconfigured keys + sources = [(name, f"extract_{key}") for name in ["MediaInfo", "Metadata", "Filename", "FileInfo"]] + + # Try each source in order until a non-None value is found + for src, method in sources: + if src in self._extractors and hasattr(self._extractors[src], method): + val = getattr(self._extractors[src], method)() + if val is not None: + return val + return None diff --git a/renamer/extractors/default_extractor.py b/renamer/extractors/default_extractor.py new file mode 100644 index 0000000..736f940 --- /dev/null +++ b/renamer/extractors/default_extractor.py @@ -0,0 +1,50 @@ +class DefaultExtractor: + """Extractor that provides default fallback values""" + + def extract_title(self): + return "Unknown Title" + + def extract_year(self): + return None + + def extract_source(self): + return None + + def extract_resolution(self): + return None + + def extract_hdr(self): + return None + + def extract_movie_db(self): + return None + + def extract_audio_langs(self): + return None + + def extract_meta_type(self): + return None + + def extract_size(self): + return None + + def extract_modification_time(self): + return None + + def extract_file_name(self): + return None + + def extract_file_path(self): + return None + + def extract_frame_class(self): + return None + + def extract_video_tracks(self): + return [] + + def extract_audio_tracks(self): + return [] + + def extract_subtitle_tracks(self): + return [] \ No newline at end of file diff --git a/renamer/extractors/filename_extractor.py b/renamer/extractors/filename_extractor.py index 3e27119..f86a4bf 100644 --- a/renamer/extractors/filename_extractor.py +++ b/renamer/extractors/filename_extractor.py @@ -12,12 +12,12 @@ class FilenameExtractor: self.file_path = file_path self.file_name = file_path.name - def _get_frame_class_from_height(self, height: int) -> str: + def _get_frame_class_from_height(self, height: int) -> str | None: """Get frame class from video height using FRAME_CLASSES constant""" for frame_class, info in FRAME_CLASSES.items(): if height == info['nominal_height']: return frame_class - return 'Unclassified' + return None def extract_title(self) -> str | None: """Extract movie title from filename""" @@ -126,9 +126,9 @@ class FilenameExtractor: unclassified_indicators = ['SD', 'LQ', 'HD', 'QHD'] for indicator in unclassified_indicators: if re.search(r'\b' + re.escape(indicator) + r'\b', self.file_name, re.IGNORECASE): - return 'Unclassified' + return None - return 'Unclassified' + return None def extract_hdr(self) -> str | None: """Extract HDR information from filename""" diff --git a/renamer/extractors/mediainfo_extractor.py b/renamer/extractors/mediainfo_extractor.py index 5125906..8393f38 100644 --- a/renamer/extractors/mediainfo_extractor.py +++ b/renamer/extractors/mediainfo_extractor.py @@ -26,7 +26,7 @@ class MediaInfoExtractor: for frame_class, info in FRAME_CLASSES.items(): if height == info['nominal_height']: return frame_class - return 'Unclassified' + return None def extract_duration(self) -> float | None: """Extract duration from media info in seconds""" @@ -39,11 +39,11 @@ class MediaInfoExtractor: def extract_frame_class(self) -> str | None: """Extract frame class from media info (480p, 720p, 1080p, etc.)""" if not self.video_tracks: - return 'Unclassified' + return None height = getattr(self.video_tracks[0], 'height', None) if height: return self._get_frame_class_from_height(height) - return 'Unclassified' + return None def extract_resolution(self) -> tuple[int, int] | None: """Extract actual video resolution as (width, height) tuple from media info""" diff --git a/renamer/test/test_filename_extractor.py b/renamer/test/test_filename_extractor.py index 6d1ef79..4c65ac6 100644 --- a/renamer/test/test_filename_extractor.py +++ b/renamer/test/test_filename_extractor.py @@ -62,11 +62,12 @@ def test_extract_frame_class(filename): # Print filename and extracted frame class clearly print(f"\nFilename: \033[1;36m{filename}\033[0m") print(f"Extracted frame_class: \033[1;32m{frame_class}\033[0m") - # Frame class should be a string - assert isinstance(frame_class, str) - # Should be one of the valid frame classes or 'Unclassified' - valid_classes = set(FRAME_CLASSES.keys()) | {'Unclassified'} - assert frame_class in valid_classes + # Frame class should be a string or None + assert frame_class is None or isinstance(frame_class, str) + # Should be one of the valid frame classes or None + if frame_class is not None: + valid_classes = set(FRAME_CLASSES.keys()) + assert frame_class in valid_classes @pytest.mark.parametrize("filename", load_test_filenames())