import re import os from typing import List from app.models.srt_translation import SRTSubtitle def parse_srt_file(file_path: str) -> List[SRTSubtitle]: """ Parse SRT file and return list of subtitle objects """ subtitles = [] try: with open(file_path, 'r', encoding='utf-8') as file: content = file.read() print(f"🔍 Read file content, length: {len(content)} characters") # More robust SRT parsing blocks = [block.strip() for block in content.split('\n\n') if block.strip()] print(f"🔍 Found {len(blocks)} blocks") for block in blocks: lines = [line.strip() for line in block.split('\n') if line.strip()] if len(lines) >= 3: try: index = int(lines[0]) timestamp = lines[1] # Handle multi-line text text_lines = lines[2:] text = ' '.join(text_lines) # Join multiple lines with space subtitle = SRTSubtitle( index=index, timestamp=timestamp, japanese_text=text ) subtitles.append(subtitle) print(f"🔍 Parsed subtitle {index}: '{text}'") except (ValueError, IndexError) as e: print(f"⚠️ Failed to parse block: {lines[0] if lines else 'empty'}, error: {e}") continue else: print(f"⚠️ Skipping invalid block with {len(lines)} lines: {lines}") except Exception as e: print(f"❌ Error reading file: {str(e)}") import traceback traceback.print_exc() print(f"🔍 Successfully parsed {len(subtitles)} subtitles") return subtitles def write_translated_srt(subtitles: List[SRTSubtitle], output_path: str): """ Write translated subtitles back to SRT format """ try: # Create directory if it doesn't exist os.makedirs(os.path.dirname(output_path), exist_ok=True) with open(output_path, 'w', encoding='utf-8') as file: for subtitle in subtitles: file.write(f"{subtitle.index}\n") file.write(f"{subtitle.timestamp}\n") if subtitle.english_translation: file.write(f"{subtitle.japanese_text}\n") file.write(f"{subtitle.english_translation}\n") else: file.write(f"{subtitle.japanese_text}\n") file.write("\n") print(f"✅ Successfully wrote {len(subtitles)} subtitles to: {output_path}") except Exception as e: print(f"❌ Error writing SRT file: {str(e)}") import traceback traceback.print_exc() raise