| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284 |
- /**
- * Voice transcription helper using internal Speech-to-Text service
- *
- * Frontend implementation guide:
- * 1. Capture audio using MediaRecorder API
- * 2. Upload audio to storage (e.g., S3) to get URL
- * 3. Call transcription with the URL
- *
- * Example usage:
- * ```tsx
- * // Frontend component
- * const transcribeMutation = trpc.voice.transcribe.useMutation({
- * onSuccess: (data) => {
- * console.log(data.text); // Full transcription
- * console.log(data.language); // Detected language
- * console.log(data.segments); // Timestamped segments
- * }
- * });
- *
- * // After uploading audio to storage
- * transcribeMutation.mutate({
- * audioUrl: uploadedAudioUrl,
- * language: 'en', // optional
- * prompt: 'Transcribe the meeting' // optional
- * });
- * ```
- */
- import { ENV } from "./env";
- export type TranscribeOptions = {
- audioUrl: string; // URL to the audio file (e.g., S3 URL)
- language?: string; // Optional: specify language code (e.g., "en", "es", "zh")
- prompt?: string; // Optional: custom prompt for the transcription
- };
- // Native Whisper API segment format
- export type WhisperSegment = {
- id: number;
- seek: number;
- start: number;
- end: number;
- text: string;
- tokens: number[];
- temperature: number;
- avg_logprob: number;
- compression_ratio: number;
- no_speech_prob: number;
- };
- // Native Whisper API response format
- export type WhisperResponse = {
- task: "transcribe";
- language: string;
- duration: number;
- text: string;
- segments: WhisperSegment[];
- };
- export type TranscriptionResponse = WhisperResponse; // Return native Whisper API response directly
- export type TranscriptionError = {
- error: string;
- code: "FILE_TOO_LARGE" | "INVALID_FORMAT" | "TRANSCRIPTION_FAILED" | "UPLOAD_FAILED" | "SERVICE_ERROR";
- details?: string;
- };
- /**
- * Transcribe audio to text using the internal Speech-to-Text service
- *
- * @param options - Audio data and metadata
- * @returns Transcription result or error
- */
- export async function transcribeAudio(
- options: TranscribeOptions
- ): Promise<TranscriptionResponse | TranscriptionError> {
- try {
- // Step 1: Validate environment configuration
- if (!ENV.forgeApiUrl) {
- return {
- error: "Voice transcription service is not configured",
- code: "SERVICE_ERROR",
- details: "BUILT_IN_FORGE_API_URL is not set"
- };
- }
- if (!ENV.forgeApiKey) {
- return {
- error: "Voice transcription service authentication is missing",
- code: "SERVICE_ERROR",
- details: "BUILT_IN_FORGE_API_KEY is not set"
- };
- }
- // Step 2: Download audio from URL
- let audioBuffer: Buffer;
- let mimeType: string;
- try {
- const response = await fetch(options.audioUrl);
- if (!response.ok) {
- return {
- error: "Failed to download audio file",
- code: "INVALID_FORMAT",
- details: `HTTP ${response.status}: ${response.statusText}`
- };
- }
-
- audioBuffer = Buffer.from(await response.arrayBuffer());
- mimeType = response.headers.get('content-type') || 'audio/mpeg';
-
- // Check file size (16MB limit)
- const sizeMB = audioBuffer.length / (1024 * 1024);
- if (sizeMB > 16) {
- return {
- error: "Audio file exceeds maximum size limit",
- code: "FILE_TOO_LARGE",
- details: `File size is ${sizeMB.toFixed(2)}MB, maximum allowed is 16MB`
- };
- }
- } catch (error) {
- return {
- error: "Failed to fetch audio file",
- code: "SERVICE_ERROR",
- details: error instanceof Error ? error.message : "Unknown error"
- };
- }
- // Step 3: Create FormData for multipart upload to Whisper API
- const formData = new FormData();
-
- // Create a Blob from the buffer and append to form
- const filename = `audio.${getFileExtension(mimeType)}`;
- const audioBlob = new Blob([new Uint8Array(audioBuffer)], { type: mimeType });
- formData.append("file", audioBlob, filename);
-
- formData.append("model", "whisper-1");
- formData.append("response_format", "verbose_json");
-
- // Add prompt - use custom prompt if provided, otherwise generate based on language
- const prompt = options.prompt || (
- options.language
- ? `Transcribe the user's voice to text, the user's working language is ${getLanguageName(options.language)}`
- : "Transcribe the user's voice to text"
- );
- formData.append("prompt", prompt);
- // Step 4: Call the transcription service
- const baseUrl = ENV.forgeApiUrl.endsWith("/")
- ? ENV.forgeApiUrl
- : `${ENV.forgeApiUrl}/`;
-
- const fullUrl = new URL(
- "v1/audio/transcriptions",
- baseUrl
- ).toString();
- const response = await fetch(fullUrl, {
- method: "POST",
- headers: {
- authorization: `Bearer ${ENV.forgeApiKey}`,
- "Accept-Encoding": "identity",
- },
- body: formData,
- });
- if (!response.ok) {
- const errorText = await response.text().catch(() => "");
- return {
- error: "Transcription service request failed",
- code: "TRANSCRIPTION_FAILED",
- details: `${response.status} ${response.statusText}${errorText ? `: ${errorText}` : ""}`
- };
- }
- // Step 5: Parse and return the transcription result
- const whisperResponse = await response.json() as WhisperResponse;
-
- // Validate response structure
- if (!whisperResponse.text || typeof whisperResponse.text !== 'string') {
- return {
- error: "Invalid transcription response",
- code: "SERVICE_ERROR",
- details: "Transcription service returned an invalid response format"
- };
- }
- return whisperResponse; // Return native Whisper API response directly
- } catch (error) {
- // Handle unexpected errors
- return {
- error: "Voice transcription failed",
- code: "SERVICE_ERROR",
- details: error instanceof Error ? error.message : "An unexpected error occurred"
- };
- }
- }
- /**
- * Helper function to get file extension from MIME type
- */
- function getFileExtension(mimeType: string): string {
- const mimeToExt: Record<string, string> = {
- 'audio/webm': 'webm',
- 'audio/mp3': 'mp3',
- 'audio/mpeg': 'mp3',
- 'audio/wav': 'wav',
- 'audio/wave': 'wav',
- 'audio/ogg': 'ogg',
- 'audio/m4a': 'm4a',
- 'audio/mp4': 'm4a',
- };
-
- return mimeToExt[mimeType] || 'audio';
- }
- /**
- * Helper function to get full language name from ISO code
- */
- function getLanguageName(langCode: string): string {
- const langMap: Record<string, string> = {
- 'en': 'English',
- 'es': 'Spanish',
- 'fr': 'French',
- 'de': 'German',
- 'it': 'Italian',
- 'pt': 'Portuguese',
- 'ru': 'Russian',
- 'ja': 'Japanese',
- 'ko': 'Korean',
- 'zh': 'Chinese',
- 'ar': 'Arabic',
- 'hi': 'Hindi',
- 'nl': 'Dutch',
- 'pl': 'Polish',
- 'tr': 'Turkish',
- 'sv': 'Swedish',
- 'da': 'Danish',
- 'no': 'Norwegian',
- 'fi': 'Finnish',
- };
-
- return langMap[langCode] || langCode;
- }
- /**
- * Example tRPC procedure implementation:
- *
- * ```ts
- * // In server/routers.ts
- * import { transcribeAudio } from "./_core/voiceTranscription";
- *
- * export const voiceRouter = router({
- * transcribe: protectedProcedure
- * .input(z.object({
- * audioUrl: z.string(),
- * language: z.string().optional(),
- * prompt: z.string().optional(),
- * }))
- * .mutation(async ({ input, ctx }) => {
- * const result = await transcribeAudio(input);
- *
- * // Check if it's an error
- * if ('error' in result) {
- * throw new TRPCError({
- * code: 'BAD_REQUEST',
- * message: result.error,
- * cause: result,
- * });
- * }
- *
- * // Optionally save transcription to database
- * await db.insert(transcriptions).values({
- * userId: ctx.user.id,
- * text: result.text,
- * duration: result.duration,
- * language: result.language,
- * audioUrl: input.audioUrl,
- * createdAt: new Date(),
- * });
- *
- * return result;
- * }),
- * });
- * ```
- */
|