feat(srt translation): add api for srt translation
This commit is contained in:
83
app/utils/srt_parser.py
Normal file
83
app/utils/srt_parser.py
Normal file
@@ -0,0 +1,83 @@
|
||||
|
||||
import re
|
||||
import os
|
||||
from typing import List
|
||||
from app.models.srt_translation import SRTSubtitle
|
||||
|
||||
def parse_srt_file(file_path: str) -> List[SRTSubtitle]:
|
||||
"""
|
||||
Parse SRT file and return list of subtitle objects
|
||||
"""
|
||||
subtitles = []
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
|
||||
print(f"🔍 Read file content, length: {len(content)} characters")
|
||||
|
||||
# More robust SRT parsing
|
||||
blocks = [block.strip() for block in content.split('\n\n') if block.strip()]
|
||||
print(f"🔍 Found {len(blocks)} blocks")
|
||||
|
||||
for block in blocks:
|
||||
lines = [line.strip() for line in block.split('\n') if line.strip()]
|
||||
|
||||
if len(lines) >= 3:
|
||||
try:
|
||||
index = int(lines[0])
|
||||
timestamp = lines[1]
|
||||
|
||||
# Handle multi-line text
|
||||
text_lines = lines[2:]
|
||||
text = ' '.join(text_lines) # Join multiple lines with space
|
||||
|
||||
subtitle = SRTSubtitle(
|
||||
index=index,
|
||||
timestamp=timestamp,
|
||||
japanese_text=text
|
||||
)
|
||||
subtitles.append(subtitle)
|
||||
|
||||
print(f"🔍 Parsed subtitle {index}: '{text}'")
|
||||
|
||||
except (ValueError, IndexError) as e:
|
||||
print(f"⚠️ Failed to parse block: {lines[0] if lines else 'empty'}, error: {e}")
|
||||
continue
|
||||
else:
|
||||
print(f"⚠️ Skipping invalid block with {len(lines)} lines: {lines}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error reading file: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
print(f"🔍 Successfully parsed {len(subtitles)} subtitles")
|
||||
return subtitles
|
||||
|
||||
def write_translated_srt(subtitles: List[SRTSubtitle], output_path: str):
|
||||
"""
|
||||
Write translated subtitles back to SRT format
|
||||
"""
|
||||
try:
|
||||
# Create directory if it doesn't exist
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as file:
|
||||
for subtitle in subtitles:
|
||||
file.write(f"{subtitle.index}\n")
|
||||
file.write(f"{subtitle.timestamp}\n")
|
||||
if subtitle.english_translation:
|
||||
file.write(f"{subtitle.japanese_text}\n")
|
||||
file.write(f"{subtitle.english_translation}\n")
|
||||
else:
|
||||
file.write(f"{subtitle.japanese_text}\n")
|
||||
file.write("\n")
|
||||
|
||||
print(f"✅ Successfully wrote {len(subtitles)} subtitles to: {output_path}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error writing SRT file: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
raise
|
||||
Reference in New Issue
Block a user