Files
Personal-API/app/utils/srt_parser.py
2026-05-05 15:28:08 +07:00

83 lines
2.9 KiB
Python

import re
import os
from typing import List
from app.models.srt_translation import SRTSubtitle
def parse_srt_file(file_path: str) -> List[SRTSubtitle]:
"""
Parse SRT file and return list of subtitle objects
"""
subtitles = []
try:
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
print(f"🔍 Read file content, length: {len(content)} characters")
# More robust SRT parsing
blocks = [block.strip() for block in content.split('\n\n') if block.strip()]
print(f"🔍 Found {len(blocks)} blocks")
for block in blocks:
lines = [line.strip() for line in block.split('\n') if line.strip()]
if len(lines) >= 3:
try:
index = int(lines[0])
timestamp = lines[1]
# Handle multi-line text
text_lines = lines[2:]
text = ' '.join(text_lines) # Join multiple lines with space
subtitle = SRTSubtitle(
index=index,
timestamp=timestamp,
japanese_text=text
)
subtitles.append(subtitle)
print(f"🔍 Parsed subtitle {index}: '{text}'")
except (ValueError, IndexError) as e:
print(f"⚠️ Failed to parse block: {lines[0] if lines else 'empty'}, error: {e}")
continue
else:
print(f"⚠️ Skipping invalid block with {len(lines)} lines: {lines}")
except Exception as e:
print(f"❌ Error reading file: {str(e)}")
import traceback
traceback.print_exc()
print(f"🔍 Successfully parsed {len(subtitles)} subtitles")
return subtitles
def write_translated_srt(subtitles: List[SRTSubtitle], output_path: str):
"""
Write translated subtitles back to SRT format
"""
try:
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as file:
for subtitle in subtitles:
file.write(f"{subtitle.index}\n")
file.write(f"{subtitle.timestamp}\n")
if subtitle.english_translation:
file.write(f"{subtitle.japanese_text}\n")
file.write(f"{subtitle.english_translation}\n")
else:
file.write(f"{subtitle.japanese_text}\n")
file.write("\n")
print(f"✅ Successfully wrote {len(subtitles)} subtitles to: {output_path}")
except Exception as e:
print(f"❌ Error writing SRT file: {str(e)}")
import traceback
traceback.print_exc()
raise