feat: Add movie database identifier extraction and update related tests

2025-12-26 14:34:58 +00:00
parent 38e3f0e553
commit c5ab985440
6 changed files with 84 additions and 2 deletions
--- a/renamer/constants.py
+++ b/renamer/constants.py
@@ -81,3 +81,30 @@ FRAME_CLASSES = {
        "description": "Ultra High Definition (UHD) - 4320p 8K",
    },
 }
+
+MOVIE_DB_DICT = {
+    "tmdb": {
+        "name": "The Movie Database (TMDb)",
+        "description": "Community built movie and TV database",
+        "url": "https://www.themoviedb.org/",
+        "patterns": ["tmdbid", "tmdb", "tmdbid-", "tmdb-"],
+    },
+    "imdb": {
+        "name": "Internet Movie Database (IMDb)",
+        "description": "Comprehensive movie, TV, and celebrity database",
+        "url": "https://www.imdb.com/",
+        "patterns": ["imdbid", "imdb", "imdbid-", "imdb-"],
+    },
+    "trakt": {
+        "name": "Trakt.tv",
+        "description": "Service that integrates with media centers for scrobbling",
+        "url": "https://trakt.tv/",
+        "patterns": ["traktid", "trakt", "traktid-", "trakt-"],
+    },
+    "tvdb": {
+        "name": "The TV Database (TVDB)",
+        "description": "Community driven TV database",
+        "url": "https://thetvdb.com/",
+        "patterns": ["tvdbid", "tvdb", "tvdbid-", "tvdb-"],
+    },
+}
--- a/renamer/extractor.py
+++ b/renamer/extractor.py
@@ -41,6 +41,9 @@ class MediaExtractor:
                ('MediaInfo', lambda: self.mediainfo_extractor.extract_hdr()),
                ('Filename', lambda: self.filename_extractor.extract_hdr())
            ],
+            'movie_db': [
+                ('Filename', lambda: self.filename_extractor.extract_movie_db())
+            ],
            'audio_langs': [
                ('MediaInfo', lambda: self.mediainfo_extractor.extract_audio_langs())
            ],
@@ -82,6 +85,7 @@ class MediaExtractor:
            'resolution': lambda x: x is not None,
            'aspect_ratio': lambda x: x is not None,
            'hdr': lambda x: x is not None,
+            'movie_db': lambda x: x is not None,
            'audio_langs': lambda x: x is not None,
            'tracks': lambda x: x is not None and any(x.get(k, []) for k in ['video_tracks', 'audio_tracks', 'subtitle_tracks']),
            'video_tracks': lambda x: x is not None and len(x) > 0,
--- a/renamer/extractors/filename_extractor.py
+++ b/renamer/extractors/filename_extractor.py
@@ -1,6 +1,6 @@
 import re
 from pathlib import Path
-from ..constants import SOURCE_DICT, FRAME_CLASSES
+from ..constants import SOURCE_DICT, FRAME_CLASSES, MOVIE_DB_DICT


 class FilenameExtractor:
@@ -138,4 +138,25 @@ class FilenameExtractor:
        if re.search(r'\bHDR\b', self.file_name, re.IGNORECASE) and not re.search(r'\bNoHDR\b', self.file_name, re.IGNORECASE):
            return 'HDR'
        
+        return None
+
+    def extract_movie_db(self) -> tuple[str, str] | None:
+        """Extract movie database identifier from filename"""
+        # Look for patterns at the end of filename in brackets or braces
+        # Patterns: [tmdbid-123] {imdb-tt123} [imdbid-tt123] etc.
+        
+        # Match patterns like [tmdbid-123456] or {imdb-tt1234567}
+        pattern = r'[\[\{]([a-zA-Z]+(?:id)?)[-\s]*([a-zA-Z0-9]+)[\]\}]'
+        matches = re.findall(pattern, self.file_name)
+        
+        if matches:
+            # Take the last match (closest to end of filename)
+            db_type, db_id = matches[-1]
+            
+            # Normalize database type
+            db_type_lower = db_type.lower()
+            for db_key, db_info in MOVIE_DB_DICT.items():
+                if any(db_type_lower.startswith(pattern.rstrip('-')) for pattern in db_info['patterns']):
+                    return (db_key, db_id)
+        
        return None
--- a/renamer/formatters/media_formatter.py
+++ b/renamer/formatters/media_formatter.py
@@ -301,6 +301,13 @@ class MediaFormatter:
                or "Not extracted",
                "display_formatters": [TextFormatter.grey],
            },
+            {
+                "label": "Movie DB",
+                "label_formatters": [TextFormatter.bold],
+                "value": self.extractor.get("movie_db", "Filename")
+                or "Not extracted",
+                "display_formatters": [TextFormatter.grey],
+            }
        ]

        return [self._format_data_item(item) for item in data]
--- a/renamer/test/filenames.txt
+++ b/renamer/test/filenames.txt
@@ -255,3 +255,9 @@ The Holdovers (2023) [1080p,ukr,eng].mkv
 Killers of the Flower Moon (2023) [2160p,HDR,ukr,eng].mkv
 Poor Things (2023) [1080p,ukr,eng].mkv
 Anatomy of a Fall (2023) [720p,ukr,eng].mkv
+Star Wars: Episode IV - A New Hope (1977) [1080p,ukr,eng].mkv
+
+Грицькові книжки.(1979).[ukr].{imdb-tt9007536}.mpg
+Гуси-Лебеді.(1949).[ukr,2rus].{imdb-tt1070792}.mkv
+Apple 1984 (1984) [Remastered] [2160p,eng] [imdbid-tt4227346].mkv
+Harley Quinn. A Very Problematic Valentine's Day Special (2023) WEB-DL [1080p,ukr,eng] [imdbid-tt22525032].mkv
--- a/renamer/test/test_filename_extractor.py
+++ b/renamer/test/test_filename_extractor.py
@@ -79,4 +79,21 @@ def test_extract_hdr(filename):
    print(f"\nFilename: \033[1;36m{filename}\033[0m")
    print(f"Extracted HDR: \033[1;32m{hdr}\033[0m")
    # HDR should be 'HDR' or None
-    assert hdr is None or hdr == 'HDR'
+    assert hdr is None or hdr == 'HDR'
+
+
+@pytest.mark.parametrize("filename", load_test_filenames())
+def test_extract_movie_db(filename):
+    """Test movie database identifier extraction from filename"""
+    file_path = Path(filename)
+    extractor = FilenameExtractor(file_path)
+    movie_db = extractor.extract_movie_db()
+    # Print filename and extracted movie DB clearly
+    print(f"\nFilename: \033[1;36m{filename}\033[0m")
+    print(f"Extracted movie DB: \033[1;32m{movie_db}\033[0m")
+    # Movie DB should be tuple (str, str) or None
+    if movie_db:
+        assert isinstance(movie_db, tuple) and len(movie_db) == 2
+        assert isinstance(movie_db[0], str) and isinstance(movie_db[1], str)
+    else:
+        assert movie_db is None