83 lines
2.9 KiB
Python
83 lines
2.9 KiB
Python
|
|
import re
|
|
import os
|
|
from typing import List
|
|
from app.models.srt_translation import SRTSubtitle
|
|
|
|
def parse_srt_file(file_path: str) -> List[SRTSubtitle]:
|
|
"""
|
|
Parse SRT file and return list of subtitle objects
|
|
"""
|
|
subtitles = []
|
|
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
content = file.read()
|
|
|
|
print(f"🔍 Read file content, length: {len(content)} characters")
|
|
|
|
# More robust SRT parsing
|
|
blocks = [block.strip() for block in content.split('\n\n') if block.strip()]
|
|
print(f"🔍 Found {len(blocks)} blocks")
|
|
|
|
for block in blocks:
|
|
lines = [line.strip() for line in block.split('\n') if line.strip()]
|
|
|
|
if len(lines) >= 3:
|
|
try:
|
|
index = int(lines[0])
|
|
timestamp = lines[1]
|
|
|
|
# Handle multi-line text
|
|
text_lines = lines[2:]
|
|
text = ' '.join(text_lines) # Join multiple lines with space
|
|
|
|
subtitle = SRTSubtitle(
|
|
index=index,
|
|
timestamp=timestamp,
|
|
japanese_text=text
|
|
)
|
|
subtitles.append(subtitle)
|
|
|
|
print(f"🔍 Parsed subtitle {index}: '{text}'")
|
|
|
|
except (ValueError, IndexError) as e:
|
|
print(f"⚠️ Failed to parse block: {lines[0] if lines else 'empty'}, error: {e}")
|
|
continue
|
|
else:
|
|
print(f"⚠️ Skipping invalid block with {len(lines)} lines: {lines}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error reading file: {str(e)}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
print(f"🔍 Successfully parsed {len(subtitles)} subtitles")
|
|
return subtitles
|
|
|
|
def write_translated_srt(subtitles: List[SRTSubtitle], output_path: str):
|
|
"""
|
|
Write translated subtitles back to SRT format
|
|
"""
|
|
try:
|
|
# Create directory if it doesn't exist
|
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as file:
|
|
for subtitle in subtitles:
|
|
file.write(f"{subtitle.index}\n")
|
|
file.write(f"{subtitle.timestamp}\n")
|
|
if subtitle.english_translation:
|
|
file.write(f"{subtitle.japanese_text}\n")
|
|
file.write(f"{subtitle.english_translation}\n")
|
|
else:
|
|
file.write(f"{subtitle.japanese_text}\n")
|
|
file.write("\n")
|
|
|
|
print(f"✅ Successfully wrote {len(subtitles)} subtitles to: {output_path}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error writing SRT file: {str(e)}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
raise |