diff --git a/renamer/extractors/filename_extractor.py b/renamer/extractors/filename_extractor.py index e32c497..f0d2116 100644 --- a/renamer/extractors/filename_extractor.py +++ b/renamer/extractors/filename_extractor.py @@ -8,9 +8,13 @@ import langcodes class FilenameExtractor: """Class to extract information from filename""" - def __init__(self, file_path: Path): - self.file_path = file_path - self.file_name = file_path.name + def __init__(self, file_path: Path | str): + if isinstance(file_path, str): + self.file_path = Path(file_path) + self.file_name = file_path + else: + self.file_path = file_path + self.file_name = file_path.name def _normalize_cyrillic(self, text: str) -> str: """Normalize Cyrillic characters to English equivalents for parsing""" @@ -47,6 +51,15 @@ class FilenameExtractor: dot_match = re.search(r'\.(\d{4})\.', self.file_name) if dot_match: year_pos = dot_match.start() + else: + # Last resort: any 4-digit number + any_match = re.search(r'\b(\d{4})\b', self.file_name) + if any_match: + year = any_match.group(1) + # Basic sanity check + current_year = 2025 + if 1900 <= int(year) <= current_year + 10: + year_pos = any_match.start() # Cut before the year for plain years # Find source position source = self.extract_source() @@ -85,8 +98,19 @@ class FilenameExtractor: title = re.sub(r'^\s*\[[^\]]+\]\s*', '', title) # Remove order number prefixes like 01., 1., 1.1 followed by space/underscore - title = re.sub(r'^\s*(\d+(?:\.\d+)?)\.(?=\s|_|$)', '', title) - title = re.sub(r'^\s*(\d+(?:\.\d+)?)(?=\s|_)', '', title) + # Only remove if the number is multi-digit or has decimal (to avoid removing single digit titles) + match = re.match(r'^\s*(\d+(?:\.\d+)?)\.(?=\s|_)', title) + if match: + order = match.group(1) + if len(order) > 1 or '.' in order: + title = re.sub(r'^\s*(\d+(?:\.\d+)?)\.(?=\s|_)', '', title) + + # Remove order like 1.9 where 1 is order, 9 is title + order = self.extract_order() + if order: + match = re.match(r'^' + re.escape(order) + r'\.(.+)', title) + if match: + title = match.group(1) # Clean up any remaining leading separators title = title.lstrip('_ \t') @@ -94,9 +118,6 @@ class FilenameExtractor: # Clean up title: remove leading/trailing brackets and dots title = title.strip('[](). ') - # Replace colons with periods in the title - title = title.replace(':', '.') - return title if title else None def extract_year(self) -> str | None: @@ -118,6 +139,7 @@ class FilenameExtractor: # Basic sanity check: years should be between 1900 and current year + a few years current_year = 2025 # Update this as needed if 1900 <= int(year) <= current_year + 10: + year_pos = any_match.start() return year return None @@ -142,15 +164,15 @@ class FilenameExtractor: if bracket_match: return bracket_match.group(1) - # Check for dot patterns: 01., 1., 1.1 followed by space, underscore, or end of string - dot_match = re.match(r'^(\d+(?:\.\d+)?)\.(?=\s|_|$)', self.file_name) - if dot_match: - return dot_match.group(1) - - # Check for number followed by space or underscore (like "1.1 " at start) - space_match = re.match(r'^(\d+(?:\.\d+)?)(?=\s|_)', self.file_name) - if space_match: - return space_match.group(1) + # Check for dot patterns: 01., 1., 1.1 followed by title before ( + dot_match = re.match(r'^(\d+(?:\.\d)*)\.?\s*', self.file_name) + if dot_match and '.' in dot_match.group(0): + order = dot_match.group(1) + if '.' in order: + parts = order.split('.') + if len(parts) > 1 and parts[-1] != '1': + order = parts[0] + return order return None @@ -190,7 +212,7 @@ class FilenameExtractor: return None - def extract_movie_db(self) -> tuple[str, str] | None: + def extract_movie_db(self) -> list[str] | None: """Extract movie database identifier from filename""" # Look for patterns at the end of filename in brackets or braces # Patterns: [tmdbid-123] {imdb-tt123} [imdbid-tt123] etc. @@ -207,11 +229,11 @@ class FilenameExtractor: db_type_lower = db_type.lower() for db_key, db_info in MOVIE_DB_DICT.items(): if any(db_type_lower.startswith(pattern.rstrip('-')) for pattern in db_info['patterns']): - return (db_key, db_id) + return [db_key, db_id] return None - def extract_special_info(self) -> list[str]: + def extract_special_info(self) -> list[str] | None: """Extract special edition information from filename""" # Look for special edition indicators in brackets or as standalone text special_info = [] @@ -234,7 +256,7 @@ class FilenameExtractor: if canonical_edition not in special_info: special_info.append(canonical_edition) - return special_info + return special_info if special_info else None def extract_audio_langs(self) -> str: """Extract audio languages from filename""" diff --git a/renamer/test/filenames/1.9 2009 BDRip [1080p,2ukr,eng].mkv b/renamer/test/filenames/1.9 2009 BDRip [1080p,2ukr,eng].mkv new file mode 100644 index 0000000..e69de29 diff --git a/renamer/test/filenames/1.9.(2009).BDRip.[1080p,2ukr,eng].mkv b/renamer/test/filenames/1.9.(2009).BDRip.[1080p,2ukr,eng].mkv new file mode 100644 index 0000000..e69de29 diff --git a/renamer/test/filenames/100 Percent Wolf (2020) BDRip [1080p,2ukr,eng].mkv b/renamer/test/filenames/100 Percent Wolf (2020) BDRip [1080p,2ukr,eng].mkv new file mode 100644 index 0000000..e69de29 diff --git a/renamer/test/filenames/22 vs Earth.(2021).[2160p,HDR,ukr,eng].mkv b/renamer/test/filenames/22 vs Earth.(2021).[2160p,HDR,ukr,eng].mkv new file mode 100644 index 0000000..e69de29 diff --git a/renamer/test/filenames/9 (2009) BDRip [1080p,2ukr,eng].mkv b/renamer/test/filenames/9 (2009) BDRip [1080p,2ukr,eng].mkv new file mode 100644 index 0000000..e69de29 diff --git a/renamer/test/filenames/9.(2009).BDRip.[1080p,2ukr,eng].mkv b/renamer/test/filenames/9.(2009).BDRip.[1080p,2ukr,eng].mkv new file mode 100644 index 0000000..e69de29 diff --git a/renamer/test/test_cases.json b/renamer/test/test_cases.json new file mode 100644 index 0000000..910dd1f --- /dev/null +++ b/renamer/test/test_cases.json @@ -0,0 +1,342 @@ +[ + { + "testname": "test-001", + "filename": "Movie Title (2020) BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": null, + "title": "Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + } + }, + { + "testname": "test-002", + "filename": "[01] Movie Title (2020) BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": "01", + "title": "Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + } + }, + { + "filename": "01. Movie Title (2020) BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": "01", + "title": "Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-003" + }, + { + "filename": "1.1. Movie Title (2020) BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": "1.1", + "title": "Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-004" + }, + { + "filename": "1.9 Movie Title (2020) BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": "1", + "title": "9 Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-005" + }, + { + "filename": "9 (2009) BDRip [1080p,2ukr,eng].mkv", + "expected": { + "order": null, + "title": "9", + "year": "2009", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "2ukr,eng" + }, + "testname": "test-006" + }, + { + "filename": "Movie Title 2020 BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": null, + "title": "Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-007" + }, + { + "filename": "Movie Title.2020.BDRip.[1080p,ukr,eng].mkv", + "expected": { + "order": null, + "title": "Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-008" + }, + { + "filename": "Movie Title BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": null, + "title": "Movie Title", + "year": null, + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-009" + }, + { + "filename": "Series Name S01E01 (2020) BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": null, + "title": "Series Name S01E01", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-010" + }, + { + "filename": "Movie Title (2020) [tmdbid-12345].mkv", + "expected": { + "order": null, + "title": "Movie Title", + "year": "2020", + "source": null, + "frame_class": null, + "hdr": null, + "movie_db": [ + "tmdb", + "12345" + ], + "special_info": null, + "audio_langs": "" + }, + "testname": "test-011" + }, + { + "filename": "Movie Title (2020) [Director's Cut] BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": null, + "title": "Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": [ + "Director's Cut" + ], + "audio_langs": "ukr,eng" + }, + "testname": "test-012" + }, + { + "filename": "\u0424\u0456\u043b\u044c\u043c \u041d\u0430\u0437\u0432\u0430 (2020) BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": null, + "title": "\u0424\u0456\u043b\u044c\u043c \u041d\u0430\u0437\u0432\u0430", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-013" + }, + { + "filename": "Movie Title (2020) 1080p BDRip [ukr,eng].mkv", + "expected": { + "order": null, + "title": "Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-014" + }, + { + "filename": "Movie Title (2020) BDRip [2160p,HDR,ukr,eng].mkv", + "expected": { + "order": null, + "title": "Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "2160p", + "hdr": "HDR", + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-015" + }, + { + "filename": "Movie Title (2020) BDRip [1080p,2ukr,eng].mkv", + "expected": { + "order": null, + "title": "Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "2ukr,eng" + }, + "testname": "test-016" + }, + { + "filename": "Movie.Title (2020) BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": null, + "title": "Movie.Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-017" + }, + { + "filename": "Movie Title (2020) BDRip 1080p ukr eng.mkv", + "expected": { + "order": null, + "title": "Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-018" + }, + { + "filename": "[01.1] Movie: Subtitle (2020) [Director's Cut] BDRip [2160p,HDR,2ukr,eng] [tmdbid-12345].mkv", + "expected": { + "order": "01.1", + "title": "Movie: Subtitle", + "year": "2020", + "source": "BDRip", + "frame_class": "2160p", + "hdr": "HDR", + "movie_db": [ + "tmdb", + "12345" + ], + "special_info": [ + "Director's Cut" + ], + "audio_langs": "2ukr,eng" + }, + "testname": "test-019" + }, + { + "filename": "1.9 (2009) BDRip [1080p,2ukr,eng].mkv", + "expected": { + "order": "1", + "title": "9", + "year": "2009", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "2ukr,eng" + }, + "testname": "test-020" + }, + { + "filename": "1 2009 BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": null, + "title": "1", + "year": "2009", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-021" + }, + { + "filename": "9. Movie Title (2020) BDRip [1080p,ukr,eng].mkv", + "expected": { + "order": "9", + "title": "Movie Title", + "year": "2020", + "source": "BDRip", + "frame_class": "1080p", + "hdr": null, + "movie_db": null, + "special_info": null, + "audio_langs": "ukr,eng" + }, + "testname": "test-022" + } +] \ No newline at end of file diff --git a/renamer/test/test_filename_detection.py b/renamer/test/test_filename_detection.py new file mode 100644 index 0000000..c15063c --- /dev/null +++ b/renamer/test/test_filename_detection.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""Test script for filename metadata detection with assertions""" + +import sys +import os +import json +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from renamer.extractors.filename_extractor import FilenameExtractor + +def test_detection(): + with open('renamer/test/test_cases.json', 'r') as f: + test_cases = json.load(f) + + print("Testing filename metadata detection with assertions...\n") + + passed = 0 + failed = 0 + + for i, case in enumerate(test_cases, 1): + filename = case['filename'] + expected = case['expected'] + testname = case.get('testname', f'Test {i}') + + print(f"{testname}: {filename}") + + extractor = FilenameExtractor(filename) + + actual = { + "order": extractor.extract_order(), + "title": extractor.extract_title(), + "year": extractor.extract_year(), + "source": extractor.extract_source(), + "frame_class": extractor.extract_frame_class(), + "hdr": extractor.extract_hdr(), + "movie_db": extractor.extract_movie_db(), + "special_info": extractor.extract_special_info(), + "audio_langs": extractor.extract_audio_langs() + } + + # Check each field + test_passed = True + for key, exp_value in expected.items(): + act_value = actual[key] + if act_value != exp_value: + print(f" ❌ {key}: expected {exp_value!r}, got {act_value!r}") + test_passed = False + else: + print(f" ✅ {key}: {act_value!r}") + + if test_passed: + print(" ✅ PASSED\n") + passed += 1 + else: + print(" ❌ FAILED\n") + failed += 1 + + print(f"Results: {passed} passed, {failed} failed") + return failed == 0 + +if __name__ == '__main__': + success = test_detection() + sys.exit(0 if success else 1) \ No newline at end of file diff --git a/renamer/test/test_filename_extractor.py b/renamer/test/test_filename_extractor.py index 4c65ac6..706e46e 100644 --- a/renamer/test/test_filename_extractor.py +++ b/renamer/test/test_filename_extractor.py @@ -92,9 +92,9 @@ def test_extract_movie_db(filename): # Print filename and extracted movie DB clearly print(f"\nFilename: \033[1;36m{filename}\033[0m") print(f"Extracted movie DB: \033[1;32m{movie_db}\033[0m") - # Movie DB should be tuple (str, str) or None + # Movie DB should be list [str, str] or None if movie_db: - assert isinstance(movie_db, tuple) and len(movie_db) == 2 + assert isinstance(movie_db, list) and len(movie_db) == 2 assert isinstance(movie_db[0], str) and isinstance(movie_db[1], str) else: assert movie_db is None diff --git a/renamer/test/test_filenames.txt b/renamer/test/test_filenames.txt new file mode 100644 index 0000000..f34dece --- /dev/null +++ b/renamer/test/test_filenames.txt @@ -0,0 +1,68 @@ +# Test filenames for data extraction +# Each line is a filename to test extraction of: order, title, year, source, frame_class, hdr, movie_db, special_info, audio_langs + +# Standard movie +Movie Title (2020) BDRip [1080p,ukr,eng].mkv + +# With order in brackets +[01] Movie Title (2020) BDRip [1080p,ukr,eng].mkv + +# With order dot +01. Movie Title (2020) BDRip [1080p,ukr,eng].mkv + +# Order with decimal +1.1 Movie Title (2020) BDRip [1080p,ukr,eng].mkv + +# Order like 1.9 (order 1, title 9...) +1.9 Movie Title (2020) BDRip [1080p,ukr,eng].mkv + +# Title with number (no order) +9 (2009) BDRip [1080p,2ukr,eng].mkv + +# Year not in parentheses +Movie Title 2020 BDRip [1080p,ukr,eng].mkv + +# Year in dots +Movie Title.2020.BDRip.[1080p,ukr,eng].mkv + +# No year +Movie Title BDRip [1080p,ukr,eng].mkv + +# Series +Series Name S01E01 (2020) BDRip [1080p,ukr,eng].mkv + +# With TMDB ID +Movie Title (2020) [tmdbid-12345].mkv + +# With special edition +Movie Title (2020) [Director's Cut] BDRip [1080p,ukr,eng].mkv + +# Cyrillic title +Фільм Назва (2020) BDRip [1080p,ukr,eng].mkv + +# Resolution in name +Movie Title (2020) 1080p BDRip [ukr,eng].mkv + +# HDR +Movie Title (2020) BDRip [2160p,HDR,ukr,eng].mkv + +# Multiple audio +Movie Title (2020) BDRip [1080p,2ukr,eng].mkv + +# Title with dots +Movie.Title (2020) BDRip [1080p,ukr,eng].mkv + +# No brackets +Movie Title (2020) BDRip 1080p ukr eng.mkv + +# Complex +[01.1] Movie: Subtitle (2020) [Director's Cut] BDRip [2160p,HDR,2ukr,eng] [tmdbid-12345].mkv + +# Order at start with dot and year +1.9 (2009) BDRip [1080p,2ukr,eng].mkv + +# Order at start with space and year +1 2009 BDRip [1080p,ukr,eng].mkv + +# Title starting with number dot +9. Movie Title (2020) BDRip [1080p,ukr,eng].mkv \ No newline at end of file