68 lines
2.2 KiB
Python
68 lines
2.2 KiB
Python
import os
|
|
import re
|
|
import asyncio
|
|
from app.core.deepseek_client import chat_with_openai
|
|
from app.models.lyric_romanji_translator import FileResult
|
|
|
|
semaphore = asyncio.Semaphore(5)
|
|
timestamp_pattern = re.compile(r"^\[\d{2}:\d{2}\.\d{2}\]")
|
|
|
|
def needs_romaji(lines, idx):
|
|
if idx + 1 < len(lines) and not timestamp_pattern.match(lines[idx + 1]):
|
|
return False
|
|
return True
|
|
|
|
async def get_romaji(text: str) -> str:
|
|
messages = [
|
|
{"role": "system", "content": "Convert Japanese text into romaji only. Output romaji without explanation."},
|
|
{"role": "user", "content": text}
|
|
]
|
|
return await chat_with_openai(messages)
|
|
|
|
async def process_lrc_file(filepath: str) -> FileResult:
|
|
added_lines = 0
|
|
with open(filepath, "r", encoding="utf-8") as f:
|
|
lines = f.readlines()
|
|
|
|
new_lines = []
|
|
for idx, line in enumerate(lines):
|
|
new_lines.append(line)
|
|
|
|
if timestamp_pattern.match(line) and needs_romaji(lines, idx):
|
|
japanese = line.strip().split("]", 1)[-1].strip()
|
|
if japanese:
|
|
romaji = await get_romaji(japanese)
|
|
new_lines.append(f"{romaji}\n")
|
|
added_lines += 1
|
|
|
|
if added_lines > 0:
|
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
f.writelines(new_lines)
|
|
|
|
return FileResult(file=filepath, processed=added_lines > 0, added_lines=added_lines)
|
|
|
|
async def safe_process(filepath):
|
|
async with semaphore:
|
|
print(f"Processing: {filepath}")
|
|
return await process_lrc_file(filepath)
|
|
|
|
async def translate_lyric_romanji(folder_path: str):
|
|
results = []
|
|
|
|
if not os.path.exists(folder_path):
|
|
return {"results": [], "status": f"error: folder not found {folder_path}"}
|
|
|
|
tasks = []
|
|
for root, _, files in os.walk(folder_path):
|
|
for file in files:
|
|
if file.endswith(".lrc"):
|
|
filepath = os.path.join(root, file)
|
|
tasks.append(asyncio.create_task(safe_process(filepath)))
|
|
|
|
if not tasks:
|
|
return {"results": [], "status": "no .lrc files found"}
|
|
|
|
# Run them all concurrently
|
|
results = await asyncio.gather(*tasks)
|
|
|
|
return {"results": results, "status": "completed"} |