From c5ab9854409d33d14ef79a957a5d03ac1ed7731b Mon Sep 17 00:00:00 2001 From: sHa Date: Fri, 26 Dec 2025 14:34:58 +0000 Subject: [PATCH] feat: Add movie database identifier extraction and update related tests --- renamer/constants.py | 27 ++++++++++++++++++++++++ renamer/extractor.py | 4 ++++ renamer/extractors/filename_extractor.py | 23 +++++++++++++++++++- renamer/formatters/media_formatter.py | 7 ++++++ renamer/test/filenames.txt | 6 ++++++ renamer/test/test_filename_extractor.py | 19 ++++++++++++++++- 6 files changed, 84 insertions(+), 2 deletions(-) diff --git a/renamer/constants.py b/renamer/constants.py index 5dd3614..7ed711f 100644 --- a/renamer/constants.py +++ b/renamer/constants.py @@ -81,3 +81,30 @@ FRAME_CLASSES = { "description": "Ultra High Definition (UHD) - 4320p 8K", }, } + +MOVIE_DB_DICT = { + "tmdb": { + "name": "The Movie Database (TMDb)", + "description": "Community built movie and TV database", + "url": "https://www.themoviedb.org/", + "patterns": ["tmdbid", "tmdb", "tmdbid-", "tmdb-"], + }, + "imdb": { + "name": "Internet Movie Database (IMDb)", + "description": "Comprehensive movie, TV, and celebrity database", + "url": "https://www.imdb.com/", + "patterns": ["imdbid", "imdb", "imdbid-", "imdb-"], + }, + "trakt": { + "name": "Trakt.tv", + "description": "Service that integrates with media centers for scrobbling", + "url": "https://trakt.tv/", + "patterns": ["traktid", "trakt", "traktid-", "trakt-"], + }, + "tvdb": { + "name": "The TV Database (TVDB)", + "description": "Community driven TV database", + "url": "https://thetvdb.com/", + "patterns": ["tvdbid", "tvdb", "tvdbid-", "tvdb-"], + }, +} diff --git a/renamer/extractor.py b/renamer/extractor.py index aaf586d..5e563da 100644 --- a/renamer/extractor.py +++ b/renamer/extractor.py @@ -41,6 +41,9 @@ class MediaExtractor: ('MediaInfo', lambda: self.mediainfo_extractor.extract_hdr()), ('Filename', lambda: self.filename_extractor.extract_hdr()) ], + 'movie_db': [ + ('Filename', lambda: self.filename_extractor.extract_movie_db()) + ], 'audio_langs': [ ('MediaInfo', lambda: self.mediainfo_extractor.extract_audio_langs()) ], @@ -82,6 +85,7 @@ class MediaExtractor: 'resolution': lambda x: x is not None, 'aspect_ratio': lambda x: x is not None, 'hdr': lambda x: x is not None, + 'movie_db': lambda x: x is not None, 'audio_langs': lambda x: x is not None, 'tracks': lambda x: x is not None and any(x.get(k, []) for k in ['video_tracks', 'audio_tracks', 'subtitle_tracks']), 'video_tracks': lambda x: x is not None and len(x) > 0, diff --git a/renamer/extractors/filename_extractor.py b/renamer/extractors/filename_extractor.py index 7f39e58..e488237 100644 --- a/renamer/extractors/filename_extractor.py +++ b/renamer/extractors/filename_extractor.py @@ -1,6 +1,6 @@ import re from pathlib import Path -from ..constants import SOURCE_DICT, FRAME_CLASSES +from ..constants import SOURCE_DICT, FRAME_CLASSES, MOVIE_DB_DICT class FilenameExtractor: @@ -138,4 +138,25 @@ class FilenameExtractor: if re.search(r'\bHDR\b', self.file_name, re.IGNORECASE) and not re.search(r'\bNoHDR\b', self.file_name, re.IGNORECASE): return 'HDR' + return None + + def extract_movie_db(self) -> tuple[str, str] | None: + """Extract movie database identifier from filename""" + # Look for patterns at the end of filename in brackets or braces + # Patterns: [tmdbid-123] {imdb-tt123} [imdbid-tt123] etc. + + # Match patterns like [tmdbid-123456] or {imdb-tt1234567} + pattern = r'[\[\{]([a-zA-Z]+(?:id)?)[-\s]*([a-zA-Z0-9]+)[\]\}]' + matches = re.findall(pattern, self.file_name) + + if matches: + # Take the last match (closest to end of filename) + db_type, db_id = matches[-1] + + # Normalize database type + db_type_lower = db_type.lower() + for db_key, db_info in MOVIE_DB_DICT.items(): + if any(db_type_lower.startswith(pattern.rstrip('-')) for pattern in db_info['patterns']): + return (db_key, db_id) + return None \ No newline at end of file diff --git a/renamer/formatters/media_formatter.py b/renamer/formatters/media_formatter.py index 7e16940..ab46eca 100644 --- a/renamer/formatters/media_formatter.py +++ b/renamer/formatters/media_formatter.py @@ -301,6 +301,13 @@ class MediaFormatter: or "Not extracted", "display_formatters": [TextFormatter.grey], }, + { + "label": "Movie DB", + "label_formatters": [TextFormatter.bold], + "value": self.extractor.get("movie_db", "Filename") + or "Not extracted", + "display_formatters": [TextFormatter.grey], + } ] return [self._format_data_item(item) for item in data] diff --git a/renamer/test/filenames.txt b/renamer/test/filenames.txt index 15cd236..bf75f7d 100644 --- a/renamer/test/filenames.txt +++ b/renamer/test/filenames.txt @@ -255,3 +255,9 @@ The Holdovers (2023) [1080p,ukr,eng].mkv Killers of the Flower Moon (2023) [2160p,HDR,ukr,eng].mkv Poor Things (2023) [1080p,ukr,eng].mkv Anatomy of a Fall (2023) [720p,ukr,eng].mkv +Star Wars: Episode IV - A New Hope (1977) [1080p,ukr,eng].mkv + +Грицькові книжки.(1979).[ukr].{imdb-tt9007536}.mpg +Гуси-Лебеді.(1949).[ukr,2rus].{imdb-tt1070792}.mkv +Apple 1984 (1984) [Remastered] [2160p,eng] [imdbid-tt4227346].mkv +Harley Quinn. A Very Problematic Valentine's Day Special (2023) WEB-DL [1080p,ukr,eng] [imdbid-tt22525032].mkv diff --git a/renamer/test/test_filename_extractor.py b/renamer/test/test_filename_extractor.py index fdada40..453626e 100644 --- a/renamer/test/test_filename_extractor.py +++ b/renamer/test/test_filename_extractor.py @@ -79,4 +79,21 @@ def test_extract_hdr(filename): print(f"\nFilename: \033[1;36m{filename}\033[0m") print(f"Extracted HDR: \033[1;32m{hdr}\033[0m") # HDR should be 'HDR' or None - assert hdr is None or hdr == 'HDR' \ No newline at end of file + assert hdr is None or hdr == 'HDR' + + +@pytest.mark.parametrize("filename", load_test_filenames()) +def test_extract_movie_db(filename): + """Test movie database identifier extraction from filename""" + file_path = Path(filename) + extractor = FilenameExtractor(file_path) + movie_db = extractor.extract_movie_db() + # Print filename and extracted movie DB clearly + print(f"\nFilename: \033[1;36m{filename}\033[0m") + print(f"Extracted movie DB: \033[1;32m{movie_db}\033[0m") + # Movie DB should be tuple (str, str) or None + if movie_db: + assert isinstance(movie_db, tuple) and len(movie_db) == 2 + assert isinstance(movie_db[0], str) and isinstance(movie_db[1], str) + else: + assert movie_db is None \ No newline at end of file