feat: Improve title extraction logic and add comprehensive test cases for edge cases
This commit is contained in:
@@ -19,15 +19,65 @@ class FilenameExtractor:
|
|||||||
|
|
||||||
def extract_title(self) -> str | None:
|
def extract_title(self) -> str | None:
|
||||||
"""Extract movie title from filename"""
|
"""Extract movie title from filename"""
|
||||||
temp_name = re.sub(r'\s*\(\d{4}\)\s*|\s*\d{4}\s*|\.\d{4}\.', '', self.file_name)
|
# Find positions of year, source, and quality brackets
|
||||||
|
year_pos = -1
|
||||||
|
source_pos = -1
|
||||||
|
quality_pos = -1
|
||||||
|
paren_match = None
|
||||||
|
dot_match = None
|
||||||
|
|
||||||
# Find and remove source
|
# Find year position (either (YYYY) or .YYYY.)
|
||||||
|
paren_match = re.search(r'\((\d{4})\)', self.file_name)
|
||||||
|
if paren_match:
|
||||||
|
year_pos = paren_match.start()
|
||||||
|
else:
|
||||||
|
dot_match = re.search(r'\.(\d{4})\.', self.file_name)
|
||||||
|
if dot_match:
|
||||||
|
year_pos = dot_match.start()
|
||||||
|
|
||||||
|
# Find source position
|
||||||
source = self.extract_source()
|
source = self.extract_source()
|
||||||
if source:
|
if source:
|
||||||
for alias in SOURCE_DICT[source]:
|
for alias in SOURCE_DICT[source]:
|
||||||
temp_name = re.sub(r'\b' + re.escape(alias) + r'\b', '', temp_name, flags=re.IGNORECASE)
|
match = re.search(r'\b' + re.escape(alias) + r'\b', self.file_name, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
source_pos = match.start()
|
||||||
|
break
|
||||||
|
|
||||||
return temp_name.rsplit('.', 1)[0].strip()
|
# Find quality bracket position (like [720p,ukr,eng])
|
||||||
|
quality_match = re.search(r'\[[^\]]*(?:720p|1080p|2160p|480p|SD|HD|HDR)[^\]]*\]', self.file_name)
|
||||||
|
if quality_match:
|
||||||
|
quality_pos = quality_match.start()
|
||||||
|
|
||||||
|
# Find the earliest position that's not at the beginning
|
||||||
|
positions = [pos for pos in [year_pos, source_pos, quality_pos] if pos > 0]
|
||||||
|
cut_pos = min(positions) if positions else -1
|
||||||
|
|
||||||
|
# Extract title (everything before the cut position)
|
||||||
|
if cut_pos > 0:
|
||||||
|
title = self.file_name[:cut_pos].strip()
|
||||||
|
else:
|
||||||
|
# No delimiters found after position 0, take everything before the last dot
|
||||||
|
title = self.file_name.rsplit('.', 1)[0].strip()
|
||||||
|
|
||||||
|
# If year is at the beginning, remove it
|
||||||
|
if year_pos == 0:
|
||||||
|
if paren_match and paren_match.start() == 0:
|
||||||
|
title = re.sub(r'^\(\d{4}\)\s*', '', title)
|
||||||
|
elif dot_match and dot_match.start() == 0:
|
||||||
|
title = re.sub(r'^\.\d{4}\.\s*', '', title)
|
||||||
|
|
||||||
|
# Remove common prefixes that are not part of the title
|
||||||
|
# Remove bracketed prefixes like [01.1], [1], etc.
|
||||||
|
title = re.sub(r'^\s*\[[^\]]+\]\s*', '', title)
|
||||||
|
|
||||||
|
# Clean up title: remove leading/trailing brackets and dots
|
||||||
|
title = title.strip('[](). ')
|
||||||
|
|
||||||
|
# Replace colons with periods in the title
|
||||||
|
title = title.replace(':', '.')
|
||||||
|
|
||||||
|
return title if title else None
|
||||||
|
|
||||||
def extract_year(self) -> str | None:
|
def extract_year(self) -> str | None:
|
||||||
"""Extract year from filename"""
|
"""Extract year from filename"""
|
||||||
|
|||||||
@@ -210,3 +210,48 @@ The long title (2008) UHD 1440 ENG.mp4
|
|||||||
The long title (2008) 8K 4320p ENG.mp4
|
The long title (2008) 8K 4320p ENG.mp4
|
||||||
Troll 2 (2025) WEB-DL 2160p HDR Ukr Nor [Hurtom].mkv
|
Troll 2 (2025) WEB-DL 2160p HDR Ukr Nor [Hurtom].mkv
|
||||||
Moana 2 (2024) MA WEB-DL 2160p SDR Ukr Eng [Hurtom].mkv
|
Moana 2 (2024) MA WEB-DL 2160p SDR Ukr Eng [Hurtom].mkv
|
||||||
|
|
||||||
|
# Test cases for title extraction with various edge cases
|
||||||
|
2001 A Space Odyssey (1968) [720p,ukr,eng].mkv
|
||||||
|
The 100 (2014) Season 1 Episode 1 [720p,ukr].mkv
|
||||||
|
[2024] Dune Part Two (2024) [2160p,HDR,ukr,eng].mkv
|
||||||
|
Star Wars Episode IV - A New Hope (1977) [1080p,ukr,eng].mkv
|
||||||
|
The Lord of the Rings 2001 Extended Edition (2001) BDRip 1080p [ukr,eng].mkv
|
||||||
|
Matrix 1999 (1999) [720p,ukr].mkv
|
||||||
|
(2023) Talk to Me [720p,ukr,eng].mkv
|
||||||
|
Avatar The Way of Water (2022) [2160p,HDR,ukr,eng].mkv
|
||||||
|
Guardians of the Galaxy Vol. 3 (2023) [1080p,ukr,eng].mkv
|
||||||
|
Spider-Man No Way Home (2021) [2160p,HDR,ukr,eng].mkv
|
||||||
|
The Batman (2022) [1080p,ukr,eng].mkv
|
||||||
|
Oppenheimer (2023) [2160p,HDR,ukr,eng].mkv
|
||||||
|
Barbie (2023) [1080p,ukr,eng].mkv
|
||||||
|
Wonka (2023) [2160p,HDR,ukr,eng].mkv
|
||||||
|
Aquaman and the Lost Kingdom (2023) [2160p,HDR,ukr,eng].mkv
|
||||||
|
Migration (2023) [1080p,ukr,eng].mkv
|
||||||
|
The Holdovers (2023) [1080p,ukr,eng].mkv
|
||||||
|
Killers of the Flower Moon (2023) [2160p,HDR,ukr,eng].mkv
|
||||||
|
Poor Things (2023) [1080p,ukr,eng].mkv
|
||||||
|
Anatomy of a Fall (2023) [720p,ukr,eng].mkv
|
||||||
|
|
||||||
|
|
||||||
|
# Test cases for title extraction with various edge cases
|
||||||
|
2001 A Space Odyssey (1968) [720p,ukr,eng].mkv
|
||||||
|
The 100 (2014) Season 1 Episode 1 [720p,ukr].mkv
|
||||||
|
[2024] Dune Part Two (2024) [2160p,HDR,ukr,eng].mkv
|
||||||
|
Star Wars Episode IV - A New Hope (1977) [1080p,ukr,eng].mkv
|
||||||
|
The Lord of the Rings 2001 Extended Edition (2001) BDRip 1080p [ukr,eng].mkv
|
||||||
|
Matrix 1999 (1999) [720p,ukr].mkv
|
||||||
|
(2023) Talk to Me [720p,ukr,eng].mkv
|
||||||
|
Avatar The Way of Water (2022) [2160p,HDR,ukr,eng].mkv
|
||||||
|
Guardians of the Galaxy Vol. 3 (2023) [1080p,ukr,eng].mkv
|
||||||
|
Spider-Man No Way Home (2021) [2160p,HDR,ukr,eng].mkv
|
||||||
|
The Batman (2022) [1080p,ukr,eng].mkv
|
||||||
|
Oppenheimer (2023) [2160p,HDR,ukr,eng].mkv
|
||||||
|
Barbie (2023) [1080p,ukr,eng].mkv
|
||||||
|
Wonka (2023) [2160p,HDR,ukr,eng].mkv
|
||||||
|
Aquaman and the Lost Kingdom (2023) [2160p,HDR,ukr,eng].mkv
|
||||||
|
Migration (2023) [1080p,ukr,eng].mkv
|
||||||
|
The Holdovers (2023) [1080p,ukr,eng].mkv
|
||||||
|
Killers of the Flower Moon (2023) [2160p,HDR,ukr,eng].mkv
|
||||||
|
Poor Things (2023) [1080p,ukr,eng].mkv
|
||||||
|
Anatomy of a Fall (2023) [720p,ukr,eng].mkv
|
||||||
|
|||||||
Reference in New Issue
Block a user