feat: add voice transcription

This commit is contained in:
bladeclara42
2025-07-03 10:02:12 +07:00
parent f047a3c1c2
commit 0fd8170c5b
5 changed files with 53 additions and 7 deletions

View File

@@ -1,6 +1,6 @@
from fastapi import APIRouter
from app.models.voice import VoiceRequest, VoiceResponse
from app.services.voice import generate_voice
from app.models.voice import VoiceRequest, VoiceResponse, TranscriptionRequest, TranscriptionResponse
from app.services.voice import generate_voice, generate_transcription
router = APIRouter()
@@ -8,3 +8,9 @@ router = APIRouter()
async def voice(request: VoiceRequest):
voice = await generate_voice(request.text)
return VoiceResponse(voice=voice)
@router.post("/transcription", response_model=TranscriptionResponse)
async def transcription(request: TranscriptionRequest):
transcription = await generate_transcription(request.audio_file_path)
return TranscriptionResponse(transcription=transcription)

View File

@@ -20,8 +20,36 @@ async def generate_voice(messages: list):
try:
response = client.chat.completions.create(
model=OPENAI_AUDIO_MODEL,
response_format="mp3",
messages=messages,
max_tokens=1000,
temperature=0.7,
stream=False
)
if not response.choices or not response.choices[0].message.content:
return "No response content from the model"
return response.choices[0].message.content
except OpenAIError as e:
error_msg = f"OpenAI API Error: {str(e)}"
print(error_msg)
raise Exception(error_msg) from e
except Exception as e:
error_msg = f"Unexpected error: {str(e)}"
print(error_msg)
raise Exception(error_msg) from e
async def generate_transcription(audio_file_path: str) -> str:
if not audio_file_path:
raise ValueError("Audio file path cannot be empty")
try:
response = client.audio.transcriptions.create(
model=OPENAI_AUDIO_MODEL,
file=audio_file_path,
response_format="text",
language="id"
)
if not response.choices or not response.choices[0].message.content:

View File

@@ -1,8 +1,17 @@
from pydantic import BaseModel
from typing import Optional
# Text-to-Speech Models
class VoiceRequest(BaseModel):
text: str
class VoiceResponse(BaseModel):
voice: str
voice_output: str
# Speech-to-Text Models
class TranscriptionRequest(BaseModel):
audio_file_path: str
target_language: Optional[str] = "id" # Default to English
class TranscriptionResponse(BaseModel):
text: str

View File

@@ -1,7 +1,10 @@
from app.core.openai_voice_client import generate_voice
from app.core.openai_voice_transcription_client import generate_voice, generate_transcription
async def generate_voice(text: str) -> str:
voice = await generate_voice(text)
return voice
async def generate_transcription(audio_file_path: str) -> str:
transcription = await generate_transcription(audio_file_path)
return transcription