import os import re import asyncio from app.core.deepseek_client import chat_with_openai from app.models.lyric_romanji_translator import FileResult semaphore = asyncio.Semaphore(5) timestamp_pattern = re.compile(r"^\[\d{2}:\d{2}\.\d{2}\]") def needs_romaji(lines, idx): if idx + 1 < len(lines) and not timestamp_pattern.match(lines[idx + 1]): return False return True async def get_romaji(text: str) -> str: messages = [ {"role": "system", "content": "Convert Japanese text into romaji only. Output romaji without explanation."}, {"role": "user", "content": text} ] return await chat_with_openai(messages) async def process_lrc_file(filepath: str) -> FileResult: added_lines = 0 with open(filepath, "r", encoding="utf-8") as f: lines = f.readlines() new_lines = [] for idx, line in enumerate(lines): new_lines.append(line) if timestamp_pattern.match(line) and needs_romaji(lines, idx): japanese = line.strip().split("]", 1)[-1].strip() if japanese: romaji = await get_romaji(japanese) new_lines.append(f"{romaji}\n") added_lines += 1 if added_lines > 0: with open(filepath, "w", encoding="utf-8") as f: f.writelines(new_lines) return FileResult(file=filepath, processed=added_lines > 0, added_lines=added_lines) async def safe_process(filepath): async with semaphore: print(f"Processing: {filepath}") return await process_lrc_file(filepath) async def translate_lyric_romanji(folder_path: str): results = [] if not os.path.exists(folder_path): return {"results": [], "status": f"error: folder not found {folder_path}"} tasks = [] for root, _, files in os.walk(folder_path): for file in files: if file.endswith(".lrc"): filepath = os.path.join(root, file) tasks.append(asyncio.create_task(safe_process(filepath))) if not tasks: return {"results": [], "status": "no .lrc files found"} # Run them all concurrently results = await asyncio.gather(*tasks) return {"results": results, "status": "completed"}