FlowiseAI · SyncWithRaj · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
diff --git a/packages/components/package.json b/packages/components/package.json
@@ -38,7 +38,9 @@
         "@aws-sdk/client-bedrock-runtime": "3.966.0",
         "@aws-sdk/client-dynamodb": "^3.360.0",
         "@aws-sdk/client-kendra": "^3.750.0",
+        "@aws-sdk/client-polly": "^3.699.0",
         "@aws-sdk/client-s3": "^3.844.0",
+        "@aws-sdk/client-transcribe": "^3.699.0",
         "@aws-sdk/client-secrets-manager": "^3.699.0",
         "@aws-sdk/client-sns": "^3.699.0",
         "@aws-sdk/client-sts": "^3.699.0",

diff --git a/packages/components/src/speechToText.ts b/packages/components/src/speechToText.ts
@@ -5,13 +5,23 @@ import { AssemblyAI } from 'assemblyai'
 import { getFileFromStorage } from './storageUtils'
 import axios from 'axios'
 import Groq from 'groq-sdk'
+import { S3Client, PutObjectCommand, DeleteObjectCommand } from '@aws-sdk/client-s3'
+import {
+    TranscribeClient,
+    StartTranscriptionJobCommand,
+    GetTranscriptionJobCommand,
+    TranscriptionJobStatus,
+    MediaFormat,
+    DeleteTranscriptionJobCommand
+} from '@aws-sdk/client-transcribe'
 
 const SpeechToTextType = {
     OPENAI_WHISPER: 'openAIWhisper',
     ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
     LOCALAI_STT: 'localAISTT',
     AZURE_COGNITIVE: 'azureCognitive',
-    GROQ_WHISPER: 'groqWhisper'
+    GROQ_WHISPER: 'groqWhisper',
+    AWS_TRANSCRIBE: 'awsTranscribe'
 }
 
 export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfig: ICommonObject, options: ICommonObject) => {
@@ -125,6 +135,156 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
                 }
                 break
             }
+            case SpeechToTextType.AWS_TRANSCRIBE: {
+                const region = speechToTextConfig.region || 'us-east-1'
+                const s3BucketName = speechToTextConfig.s3BucketName as string
+                const languageCode = speechToTextConfig.languageCode || 'en-US'
+
+                if (!s3BucketName) {
+                    throw new Error('S3 Bucket Name is required for AWS Transcribe')
+                }
+
+                const awsClientConfig: Record<string, any> = { region }
+                if (credentialData.awsKey && credentialData.awsSecret) {
+                    awsClientConfig.credentials = {
+                        accessKeyId: credentialData.awsKey,
+                        secretAccessKey: credentialData.awsSecret,
+                        ...(credentialData.awsSession && { sessionToken: credentialData.awsSession })
+                    }
+                }
+
+                const s3Client = new S3Client(awsClientConfig)
+                const transcribeClient = new TranscribeClient(awsClientConfig)
+
+                // Generate unique file name and upload to S3
+                const fileExtension = ((upload.name || '').split('.').pop() || 'webm').toLowerCase()
+                const s3Key = 'flowise-stt-temp/' + Date.now() + '-' + Math.random().toString(36).substring(2) + '.' + fileExtension
+                const jobName = 'flowise-' + Date.now() + '-' + Math.random().toString(36).substring(2)
+
+                try {
+                    await s3Client.send(
+                        new PutObjectCommand({
+                            Bucket: s3BucketName,
+                            Key: s3Key,
+                            Body: Buffer.from(audio_file),
+                            ContentType: upload.mime || 'audio/webm'
+                        })
+                    )
+
+                    // Determine media format from file extension
+                    const mediaFormatMap: Record<string, string> = {
+                        webm: 'webm',
+                        mp3: 'mp3',
+                        mp4: 'mp4',
+                        m4a: 'm4a',
+                        wav: 'wav',
+                        flac: 'flac',
+                        ogg: 'ogg',
+                        amr: 'amr'
+                    }
+                    const mediaFormat = (mediaFormatMap[fileExtension] || 'webm') as MediaFormat
+
+                    // Start transcription job
+                    await transcribeClient.send(
+                        new StartTranscriptionJobCommand({
+                            TranscriptionJobName: jobName,
+                            LanguageCode: languageCode,
+                            Media: {
+                                MediaFileUri: `s3://${s3BucketName}/${s3Key}`
+                            },
+                            MediaFormat: mediaFormat
+                        })
+                    )
+
+                    // Poll for completion with 60 second timeout
+                    const POLL_INTERVAL_MS = 3000
+                    const TIMEOUT_MS = 60000
+                    const startTime = Date.now()
+
+                    let transcriptText = ''
+                    let jobCompleted = false
+
+                    while (!jobCompleted) {
+                        if (Date.now() - startTime > TIMEOUT_MS) {
+                            throw new Error('AWS Transcribe job timed out after 60 seconds')
+                        }
+
+                        await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
+
+                        const jobResult = await transcribeClient.send(
+                            new GetTranscriptionJobCommand({
+                                TranscriptionJobName: jobName
+                            })
+                        )
+
+                        const status = jobResult.TranscriptionJob?.TranscriptionJobStatus
+
+                        if (status === TranscriptionJobStatus.COMPLETED) {
+                            const transcriptUri = jobResult.TranscriptionJob?.Transcript?.TranscriptFileUri
+                            if (transcriptUri) {
+                                const transcriptResponse = await axios.get(transcriptUri)
+                                const transcriptData = transcriptResponse.data
+                                transcriptText = transcriptData?.results?.transcripts?.[0]?.transcript || ''
+                            }
+                            jobCompleted = true
+                        } else if (status === TranscriptionJobStatus.FAILED) {
+                            const failureReason = jobResult.TranscriptionJob?.FailureReason || 'Unknown error'
+                            throw new Error(`AWS Transcribe job failed: ${failureReason}`)
+                        }
+                        // IN_PROGRESS or QUEUED — continue polling
+                    }
+
+                    // Clean up: delete temporary S3 file and Transcribe job
+                    try {
+                        await s3Client.send(
+                            new DeleteObjectCommand({
+                                Bucket: s3BucketName,
+                                Key: s3Key
+                            })
+                        )
+                    } catch {
+                        // Non-fatal: log but don't fail if cleanup fails
+                    }
+
+                    try {
+                        await transcribeClient.send(
+                            new DeleteTranscriptionJobCommand({
+                                TranscriptionJobName: jobName
+                            })
+                        )
+                    } catch {
+                        // Non-fatal
+                    }
+
+                    if (transcriptText) {
+                        return transcriptText
+                    }
+                } catch (error) {
+                    // Attempt cleanup on error too
+                    try {
+                        await s3Client.send(
+                            new DeleteObjectCommand({
+                                Bucket: s3BucketName,
+                                Key: s3Key
+                            })
+                        )
+                    } catch {
+                        // Non-fatal cleanup error
+                    }
+
+                    try {
+                        await transcribeClient.send(
+                            new DeleteTranscriptionJobCommand({
+                                TranscriptionJobName: jobName
+                            })
+                        )
+                    } catch {
+                        // Non-fatal cleanup error
+                    }
+                    throw error
+                }
+                break
+            }
         }
     } else {
         throw new Error('Speech to text is not selected, but found a recorded audio file. Please fix the chain.')

diff --git a/packages/components/src/textToSpeech.ts b/packages/components/src/textToSpeech.ts
@@ -2,12 +2,14 @@ import { ICommonObject } from './Interface'
 import { getCredentialData } from './utils'
 import OpenAI from 'openai'
 import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js'
+import { PollyClient, SynthesizeSpeechCommand, Engine, VoiceId } from '@aws-sdk/client-polly'
 import { Readable } from 'node:stream'
 import type { ReadableStream } from 'node:stream/web'
 
 const TextToSpeechType = {
     OPENAI_TTS: 'openai',
-    ELEVEN_LABS_TTS: 'elevenlabs'
+    ELEVEN_LABS_TTS: 'elevenlabs',
+    AMAZON_POLLY_TTS: 'amazonPolly'
 }
 
 export const convertTextToSpeechStream = async (
@@ -100,6 +102,51 @@ export const convertTextToSpeechStream = async (
                             })
                             break
                         }
+
+                        case TextToSpeechType.AMAZON_POLLY_TTS: {
+                            onStart('mp3')
+
+                            const region = textToSpeechConfig.region || 'us-east-1'
+                            const pollyClientConfig: Record<string, any> = { region }
+
+                            if (credentialData.awsKey && credentialData.awsSecret) {
+                                pollyClientConfig.credentials = {
+                                    accessKeyId: credentialData.awsKey,
+                                    secretAccessKey: credentialData.awsSecret,
+                                    ...(credentialData.awsSession && { sessionToken: credentialData.awsSession })
+                                }
+                            }
+
+                            const pollyClient = new PollyClient(pollyClientConfig)
+
+                            const voiceId = (textToSpeechConfig.voice || 'Joanna') as VoiceId
+                            const engine = (textToSpeechConfig.engine || 'neural') as Engine
+
+                            const command = new SynthesizeSpeechCommand({
+                                Text: text,
+                                OutputFormat: 'mp3',
+                                VoiceId: voiceId,
+                                Engine: engine
+                            })
+
+                            const pollyResponse = await pollyClient.send(command, {
+                                abortSignal: abortController.signal
+                            })
+
+                            if (!pollyResponse.AudioStream) {
+                                throw new Error('Amazon Polly returned no audio stream')
+                            }
+
+                            // AudioStream from Polly is a Readable in Node.js
+                            const pollyStream = pollyResponse.AudioStream as unknown as Readable
+                            const stream =
+                                pollyStream instanceof Readable ? pollyStream : Readable.fromWeb(pollyStream as unknown as ReadableStream)
+
+                            await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 20, abortController, () => {
+                                streamDestroyed = true
+                            })
+                            break
+                        }
                     }
                 } else {
                     reject(new Error('Text to speech is not selected. Please configure TTS in the chatflow.'))
@@ -234,6 +281,31 @@ export const getVoices = async (provider: string, credentialId: string, options:
             }))
         }
 
+        case TextToSpeechType.AMAZON_POLLY_TTS:
+            return [
+                { id: 'Joanna', name: 'Joanna (Female, US English)' },
+                { id: 'Matthew', name: 'Matthew (Male, US English)' },
+                { id: 'Ruth', name: 'Ruth (Female, US English)' },
+                { id: 'Stephen', name: 'Stephen (Male, US English)' },
+                { id: 'Ivy', name: 'Ivy (Female Child, US English)' },
+                { id: 'Kevin', name: 'Kevin (Male Child, US English)' },
+                { id: 'Kendra', name: 'Kendra (Female, US English)' },
+                { id: 'Kimberly', name: 'Kimberly (Female, US English)' },
+                { id: 'Salli', name: 'Salli (Female, US English)' },
+                { id: 'Joey', name: 'Joey (Male, US English)' },
+                { id: 'Justin', name: 'Justin (Male Child, US English)' },
+                { id: 'Gregory', name: 'Gregory (Male, US English)' },
+                { id: 'Danielle', name: 'Danielle (Female, US English)' },
+                { id: 'Amy', name: 'Amy (Female, British English)' },
+                { id: 'Brian', name: 'Brian (Male, British English)' },
+                { id: 'Emma', name: 'Emma (Female, British English)' },
+                { id: 'Lupe', name: 'Lupe (Female, US Spanish)' },
+                { id: 'Pedro', name: 'Pedro (Male, US Spanish)' },
+                { id: 'Léa', name: 'Léa (Female, French)' },
+                { id: 'Vicki', name: 'Vicki (Female, German)' },
+                { id: 'Daniel', name: 'Daniel (Male, German)' }
+            ]
+
         default:
             throw new Error(`Unsupported TTS provider: ${provider}`)
     }

diff --git a/packages/server/src/controllers/text-to-speech/index.ts b/packages/server/src/controllers/text-to-speech/index.ts
@@ -17,7 +17,9 @@ const generateTextToSpeech = async (req: Request, res: Response) => {
             provider: bodyProvider,
             credentialId: bodyCredentialId,
             voice: bodyVoice,
-            model: bodyModel
+            model: bodyModel,
+            engine: bodyEngine,
+            region: bodyRegion
         } = req.body
 
         if (!text) {
@@ -27,7 +29,7 @@ const generateTextToSpeech = async (req: Request, res: Response) => {
             )
         }
 
-        let provider: string, credentialId: string, voice: string, model: string
+        let provider: string, credentialId: string, voice: string, model: string, engine: string, region: string
 
         if (chatflowId) {
             let workspaceId = req.user?.activeWorkspaceId
@@ -64,12 +66,16 @@ const generateTextToSpeech = async (req: Request, res: Response) => {
             credentialId = providerConfig.credentialId
             voice = providerConfig.voice
             model = providerConfig.model
+            engine = providerConfig.engine
+            region = providerConfig.region
         } else {
             // Use TTS config from request body
             provider = bodyProvider
             credentialId = bodyCredentialId
             voice = bodyVoice
             model = bodyModel
+            engine = bodyEngine
+            region = bodyRegion
         }
 
         if (!provider) {
@@ -103,7 +109,9 @@ const generateTextToSpeech = async (req: Request, res: Response) => {
             name: provider,
             credentialId: credentialId,
             voice: voice,
-            model: model
+            model: model,
+            engine: engine,
+            region: region
         }
 
         // Create and store AbortController

diff --git a/packages/server/src/services/text-to-speech/index.ts b/packages/server/src/services/text-to-speech/index.ts
@@ -7,7 +7,8 @@ import { databaseEntities } from '../../utils'
 
 export enum TextToSpeechProvider {
     OPENAI = 'openai',
-    ELEVEN_LABS = 'elevenlabs'
+    ELEVEN_LABS = 'elevenlabs',
+    AMAZON_POLLY = 'amazonPolly'
 }
 
 export interface TTSRequest {

diff --git a/packages/server/src/utils/buildChatflow.ts b/packages/server/src/utils/buildChatflow.ts
@@ -108,6 +108,7 @@ const generateTTSForResponseStream = async (
             const provider = config[providerKey]
             if (provider && provider.status === true) {
                 activeProviderConfig = {
+                    ...provider,
                     name: providerKey,
                     credentialId: provider.credentialId,
                     voice: provider.voice,

diff --git a/packages/ui/src/assets/images/aws.svg b/packages/ui/src/assets/images/aws.svg