feat: add voice transcription
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
from app.models.voice import VoiceRequest, VoiceResponse
|
from app.models.voice import VoiceRequest, VoiceResponse, TranscriptionRequest, TranscriptionResponse
|
||||||
from app.services.voice import generate_voice
|
from app.services.voice import generate_voice, generate_transcription
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
@@ -8,3 +8,9 @@ router = APIRouter()
|
|||||||
async def voice(request: VoiceRequest):
|
async def voice(request: VoiceRequest):
|
||||||
voice = await generate_voice(request.text)
|
voice = await generate_voice(request.text)
|
||||||
return VoiceResponse(voice=voice)
|
return VoiceResponse(voice=voice)
|
||||||
|
|
||||||
|
@router.post("/transcription", response_model=TranscriptionResponse)
|
||||||
|
async def transcription(request: TranscriptionRequest):
|
||||||
|
transcription = await generate_transcription(request.audio_file_path)
|
||||||
|
return TranscriptionResponse(transcription=transcription)
|
||||||
|
|
||||||
|
|||||||
@@ -20,8 +20,36 @@ async def generate_voice(messages: list):
|
|||||||
try:
|
try:
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
model=OPENAI_AUDIO_MODEL,
|
model=OPENAI_AUDIO_MODEL,
|
||||||
response_format="mp3",
|
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
max_tokens=1000,
|
||||||
|
temperature=0.7,
|
||||||
|
stream=False
|
||||||
|
)
|
||||||
|
|
||||||
|
if not response.choices or not response.choices[0].message.content:
|
||||||
|
return "No response content from the model"
|
||||||
|
|
||||||
|
return response.choices[0].message.content
|
||||||
|
|
||||||
|
except OpenAIError as e:
|
||||||
|
error_msg = f"OpenAI API Error: {str(e)}"
|
||||||
|
print(error_msg)
|
||||||
|
raise Exception(error_msg) from e
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Unexpected error: {str(e)}"
|
||||||
|
print(error_msg)
|
||||||
|
raise Exception(error_msg) from e
|
||||||
|
|
||||||
|
async def generate_transcription(audio_file_path: str) -> str:
|
||||||
|
if not audio_file_path:
|
||||||
|
raise ValueError("Audio file path cannot be empty")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = client.audio.transcriptions.create(
|
||||||
|
model=OPENAI_AUDIO_MODEL,
|
||||||
|
file=audio_file_path,
|
||||||
|
response_format="text",
|
||||||
|
language="id"
|
||||||
)
|
)
|
||||||
|
|
||||||
if not response.choices or not response.choices[0].message.content:
|
if not response.choices or not response.choices[0].message.content:
|
||||||
@@ -6,4 +6,4 @@ app = FastAPI()
|
|||||||
|
|
||||||
# Include your routes
|
# Include your routes
|
||||||
app.include_router(translate.router, prefix="/api/v1/translate", tags=["translate"])
|
app.include_router(translate.router, prefix="/api/v1/translate", tags=["translate"])
|
||||||
app.include_router(voice.router, prefix="/api/v1/voice", tags=["voice"])
|
app.include_router(voice.router, prefix="/api/v1/voice", tags=["voice"])
|
||||||
|
|||||||
@@ -1,8 +1,17 @@
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
# Text-to-Speech Models
|
||||||
class VoiceRequest(BaseModel):
|
class VoiceRequest(BaseModel):
|
||||||
text: str
|
text: str
|
||||||
|
|
||||||
class VoiceResponse(BaseModel):
|
class VoiceResponse(BaseModel):
|
||||||
voice: str
|
voice_output: str
|
||||||
|
|
||||||
|
# Speech-to-Text Models
|
||||||
|
class TranscriptionRequest(BaseModel):
|
||||||
|
audio_file_path: str
|
||||||
|
target_language: Optional[str] = "id" # Default to English
|
||||||
|
|
||||||
|
class TranscriptionResponse(BaseModel):
|
||||||
|
text: str
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
from app.core.openai_voice_client import generate_voice
|
from app.core.openai_voice_transcription_client import generate_voice, generate_transcription
|
||||||
|
|
||||||
async def generate_voice(text: str) -> str:
|
async def generate_voice(text: str) -> str:
|
||||||
voice = await generate_voice(text)
|
voice = await generate_voice(text)
|
||||||
return voice
|
return voice
|
||||||
|
|
||||||
|
async def generate_transcription(audio_file_path: str) -> str:
|
||||||
|
transcription = await generate_transcription(audio_file_path)
|
||||||
|
return transcription
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user