feat: Add movie database identifier extraction and update related tests
This commit is contained in:
@@ -81,3 +81,30 @@ FRAME_CLASSES = {
|
|||||||
"description": "Ultra High Definition (UHD) - 4320p 8K",
|
"description": "Ultra High Definition (UHD) - 4320p 8K",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MOVIE_DB_DICT = {
|
||||||
|
"tmdb": {
|
||||||
|
"name": "The Movie Database (TMDb)",
|
||||||
|
"description": "Community built movie and TV database",
|
||||||
|
"url": "https://www.themoviedb.org/",
|
||||||
|
"patterns": ["tmdbid", "tmdb", "tmdbid-", "tmdb-"],
|
||||||
|
},
|
||||||
|
"imdb": {
|
||||||
|
"name": "Internet Movie Database (IMDb)",
|
||||||
|
"description": "Comprehensive movie, TV, and celebrity database",
|
||||||
|
"url": "https://www.imdb.com/",
|
||||||
|
"patterns": ["imdbid", "imdb", "imdbid-", "imdb-"],
|
||||||
|
},
|
||||||
|
"trakt": {
|
||||||
|
"name": "Trakt.tv",
|
||||||
|
"description": "Service that integrates with media centers for scrobbling",
|
||||||
|
"url": "https://trakt.tv/",
|
||||||
|
"patterns": ["traktid", "trakt", "traktid-", "trakt-"],
|
||||||
|
},
|
||||||
|
"tvdb": {
|
||||||
|
"name": "The TV Database (TVDB)",
|
||||||
|
"description": "Community driven TV database",
|
||||||
|
"url": "https://thetvdb.com/",
|
||||||
|
"patterns": ["tvdbid", "tvdb", "tvdbid-", "tvdb-"],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|||||||
@@ -41,6 +41,9 @@ class MediaExtractor:
|
|||||||
('MediaInfo', lambda: self.mediainfo_extractor.extract_hdr()),
|
('MediaInfo', lambda: self.mediainfo_extractor.extract_hdr()),
|
||||||
('Filename', lambda: self.filename_extractor.extract_hdr())
|
('Filename', lambda: self.filename_extractor.extract_hdr())
|
||||||
],
|
],
|
||||||
|
'movie_db': [
|
||||||
|
('Filename', lambda: self.filename_extractor.extract_movie_db())
|
||||||
|
],
|
||||||
'audio_langs': [
|
'audio_langs': [
|
||||||
('MediaInfo', lambda: self.mediainfo_extractor.extract_audio_langs())
|
('MediaInfo', lambda: self.mediainfo_extractor.extract_audio_langs())
|
||||||
],
|
],
|
||||||
@@ -82,6 +85,7 @@ class MediaExtractor:
|
|||||||
'resolution': lambda x: x is not None,
|
'resolution': lambda x: x is not None,
|
||||||
'aspect_ratio': lambda x: x is not None,
|
'aspect_ratio': lambda x: x is not None,
|
||||||
'hdr': lambda x: x is not None,
|
'hdr': lambda x: x is not None,
|
||||||
|
'movie_db': lambda x: x is not None,
|
||||||
'audio_langs': lambda x: x is not None,
|
'audio_langs': lambda x: x is not None,
|
||||||
'tracks': lambda x: x is not None and any(x.get(k, []) for k in ['video_tracks', 'audio_tracks', 'subtitle_tracks']),
|
'tracks': lambda x: x is not None and any(x.get(k, []) for k in ['video_tracks', 'audio_tracks', 'subtitle_tracks']),
|
||||||
'video_tracks': lambda x: x is not None and len(x) > 0,
|
'video_tracks': lambda x: x is not None and len(x) > 0,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from ..constants import SOURCE_DICT, FRAME_CLASSES
|
from ..constants import SOURCE_DICT, FRAME_CLASSES, MOVIE_DB_DICT
|
||||||
|
|
||||||
|
|
||||||
class FilenameExtractor:
|
class FilenameExtractor:
|
||||||
@@ -139,3 +139,24 @@ class FilenameExtractor:
|
|||||||
return 'HDR'
|
return 'HDR'
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def extract_movie_db(self) -> tuple[str, str] | None:
|
||||||
|
"""Extract movie database identifier from filename"""
|
||||||
|
# Look for patterns at the end of filename in brackets or braces
|
||||||
|
# Patterns: [tmdbid-123] {imdb-tt123} [imdbid-tt123] etc.
|
||||||
|
|
||||||
|
# Match patterns like [tmdbid-123456] or {imdb-tt1234567}
|
||||||
|
pattern = r'[\[\{]([a-zA-Z]+(?:id)?)[-\s]*([a-zA-Z0-9]+)[\]\}]'
|
||||||
|
matches = re.findall(pattern, self.file_name)
|
||||||
|
|
||||||
|
if matches:
|
||||||
|
# Take the last match (closest to end of filename)
|
||||||
|
db_type, db_id = matches[-1]
|
||||||
|
|
||||||
|
# Normalize database type
|
||||||
|
db_type_lower = db_type.lower()
|
||||||
|
for db_key, db_info in MOVIE_DB_DICT.items():
|
||||||
|
if any(db_type_lower.startswith(pattern.rstrip('-')) for pattern in db_info['patterns']):
|
||||||
|
return (db_key, db_id)
|
||||||
|
|
||||||
|
return None
|
||||||
@@ -301,6 +301,13 @@ class MediaFormatter:
|
|||||||
or "Not extracted",
|
or "Not extracted",
|
||||||
"display_formatters": [TextFormatter.grey],
|
"display_formatters": [TextFormatter.grey],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"label": "Movie DB",
|
||||||
|
"label_formatters": [TextFormatter.bold],
|
||||||
|
"value": self.extractor.get("movie_db", "Filename")
|
||||||
|
or "Not extracted",
|
||||||
|
"display_formatters": [TextFormatter.grey],
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
return [self._format_data_item(item) for item in data]
|
return [self._format_data_item(item) for item in data]
|
||||||
|
|||||||
@@ -255,3 +255,9 @@ The Holdovers (2023) [1080p,ukr,eng].mkv
|
|||||||
Killers of the Flower Moon (2023) [2160p,HDR,ukr,eng].mkv
|
Killers of the Flower Moon (2023) [2160p,HDR,ukr,eng].mkv
|
||||||
Poor Things (2023) [1080p,ukr,eng].mkv
|
Poor Things (2023) [1080p,ukr,eng].mkv
|
||||||
Anatomy of a Fall (2023) [720p,ukr,eng].mkv
|
Anatomy of a Fall (2023) [720p,ukr,eng].mkv
|
||||||
|
Star Wars: Episode IV - A New Hope (1977) [1080p,ukr,eng].mkv
|
||||||
|
|
||||||
|
Грицькові книжки.(1979).[ukr].{imdb-tt9007536}.mpg
|
||||||
|
Гуси-Лебеді.(1949).[ukr,2rus].{imdb-tt1070792}.mkv
|
||||||
|
Apple 1984 (1984) [Remastered] [2160p,eng] [imdbid-tt4227346].mkv
|
||||||
|
Harley Quinn. A Very Problematic Valentine's Day Special (2023) WEB-DL [1080p,ukr,eng] [imdbid-tt22525032].mkv
|
||||||
|
|||||||
@@ -80,3 +80,20 @@ def test_extract_hdr(filename):
|
|||||||
print(f"Extracted HDR: \033[1;32m{hdr}\033[0m")
|
print(f"Extracted HDR: \033[1;32m{hdr}\033[0m")
|
||||||
# HDR should be 'HDR' or None
|
# HDR should be 'HDR' or None
|
||||||
assert hdr is None or hdr == 'HDR'
|
assert hdr is None or hdr == 'HDR'
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("filename", load_test_filenames())
|
||||||
|
def test_extract_movie_db(filename):
|
||||||
|
"""Test movie database identifier extraction from filename"""
|
||||||
|
file_path = Path(filename)
|
||||||
|
extractor = FilenameExtractor(file_path)
|
||||||
|
movie_db = extractor.extract_movie_db()
|
||||||
|
# Print filename and extracted movie DB clearly
|
||||||
|
print(f"\nFilename: \033[1;36m{filename}\033[0m")
|
||||||
|
print(f"Extracted movie DB: \033[1;32m{movie_db}\033[0m")
|
||||||
|
# Movie DB should be tuple (str, str) or None
|
||||||
|
if movie_db:
|
||||||
|
assert isinstance(movie_db, tuple) and len(movie_db) == 2
|
||||||
|
assert isinstance(movie_db[0], str) and isinstance(movie_db[1], str)
|
||||||
|
else:
|
||||||
|
assert movie_db is None
|
||||||
Reference in New Issue
Block a user