feat: Add movie database identifier extraction and update related tests

This commit is contained in:
sHa
2025-12-26 14:34:58 +00:00
parent 38e3f0e553
commit c5ab985440
6 changed files with 84 additions and 2 deletions

View File

@@ -81,3 +81,30 @@ FRAME_CLASSES = {
"description": "Ultra High Definition (UHD) - 4320p 8K",
},
}
MOVIE_DB_DICT = {
"tmdb": {
"name": "The Movie Database (TMDb)",
"description": "Community built movie and TV database",
"url": "https://www.themoviedb.org/",
"patterns": ["tmdbid", "tmdb", "tmdbid-", "tmdb-"],
},
"imdb": {
"name": "Internet Movie Database (IMDb)",
"description": "Comprehensive movie, TV, and celebrity database",
"url": "https://www.imdb.com/",
"patterns": ["imdbid", "imdb", "imdbid-", "imdb-"],
},
"trakt": {
"name": "Trakt.tv",
"description": "Service that integrates with media centers for scrobbling",
"url": "https://trakt.tv/",
"patterns": ["traktid", "trakt", "traktid-", "trakt-"],
},
"tvdb": {
"name": "The TV Database (TVDB)",
"description": "Community driven TV database",
"url": "https://thetvdb.com/",
"patterns": ["tvdbid", "tvdb", "tvdbid-", "tvdb-"],
},
}

View File

@@ -41,6 +41,9 @@ class MediaExtractor:
('MediaInfo', lambda: self.mediainfo_extractor.extract_hdr()),
('Filename', lambda: self.filename_extractor.extract_hdr())
],
'movie_db': [
('Filename', lambda: self.filename_extractor.extract_movie_db())
],
'audio_langs': [
('MediaInfo', lambda: self.mediainfo_extractor.extract_audio_langs())
],
@@ -82,6 +85,7 @@ class MediaExtractor:
'resolution': lambda x: x is not None,
'aspect_ratio': lambda x: x is not None,
'hdr': lambda x: x is not None,
'movie_db': lambda x: x is not None,
'audio_langs': lambda x: x is not None,
'tracks': lambda x: x is not None and any(x.get(k, []) for k in ['video_tracks', 'audio_tracks', 'subtitle_tracks']),
'video_tracks': lambda x: x is not None and len(x) > 0,

View File

@@ -1,6 +1,6 @@
import re
from pathlib import Path
from ..constants import SOURCE_DICT, FRAME_CLASSES
from ..constants import SOURCE_DICT, FRAME_CLASSES, MOVIE_DB_DICT
class FilenameExtractor:
@@ -138,4 +138,25 @@ class FilenameExtractor:
if re.search(r'\bHDR\b', self.file_name, re.IGNORECASE) and not re.search(r'\bNoHDR\b', self.file_name, re.IGNORECASE):
return 'HDR'
return None
def extract_movie_db(self) -> tuple[str, str] | None:
"""Extract movie database identifier from filename"""
# Look for patterns at the end of filename in brackets or braces
# Patterns: [tmdbid-123] {imdb-tt123} [imdbid-tt123] etc.
# Match patterns like [tmdbid-123456] or {imdb-tt1234567}
pattern = r'[\[\{]([a-zA-Z]+(?:id)?)[-\s]*([a-zA-Z0-9]+)[\]\}]'
matches = re.findall(pattern, self.file_name)
if matches:
# Take the last match (closest to end of filename)
db_type, db_id = matches[-1]
# Normalize database type
db_type_lower = db_type.lower()
for db_key, db_info in MOVIE_DB_DICT.items():
if any(db_type_lower.startswith(pattern.rstrip('-')) for pattern in db_info['patterns']):
return (db_key, db_id)
return None

View File

@@ -301,6 +301,13 @@ class MediaFormatter:
or "Not extracted",
"display_formatters": [TextFormatter.grey],
},
{
"label": "Movie DB",
"label_formatters": [TextFormatter.bold],
"value": self.extractor.get("movie_db", "Filename")
or "Not extracted",
"display_formatters": [TextFormatter.grey],
}
]
return [self._format_data_item(item) for item in data]

View File

@@ -255,3 +255,9 @@ The Holdovers (2023) [1080p,ukr,eng].mkv
Killers of the Flower Moon (2023) [2160p,HDR,ukr,eng].mkv
Poor Things (2023) [1080p,ukr,eng].mkv
Anatomy of a Fall (2023) [720p,ukr,eng].mkv
Star Wars: Episode IV - A New Hope (1977) [1080p,ukr,eng].mkv
Грицькові книжки.(1979).[ukr].{imdb-tt9007536}.mpg
Гуси-Лебеді.(1949).[ukr,2rus].{imdb-tt1070792}.mkv
Apple 1984 (1984) [Remastered] [2160p,eng] [imdbid-tt4227346].mkv
Harley Quinn. A Very Problematic Valentine's Day Special (2023) WEB-DL [1080p,ukr,eng] [imdbid-tt22525032].mkv

View File

@@ -79,4 +79,21 @@ def test_extract_hdr(filename):
print(f"\nFilename: \033[1;36m{filename}\033[0m")
print(f"Extracted HDR: \033[1;32m{hdr}\033[0m")
# HDR should be 'HDR' or None
assert hdr is None or hdr == 'HDR'
assert hdr is None or hdr == 'HDR'
@pytest.mark.parametrize("filename", load_test_filenames())
def test_extract_movie_db(filename):
"""Test movie database identifier extraction from filename"""
file_path = Path(filename)
extractor = FilenameExtractor(file_path)
movie_db = extractor.extract_movie_db()
# Print filename and extracted movie DB clearly
print(f"\nFilename: \033[1;36m{filename}\033[0m")
print(f"Extracted movie DB: \033[1;32m{movie_db}\033[0m")
# Movie DB should be tuple (str, str) or None
if movie_db:
assert isinstance(movie_db, tuple) and len(movie_db) == 2
assert isinstance(movie_db[0], str) and isinstance(movie_db[1], str)
else:
assert movie_db is None