I'm trying to capture audio playing from a browser tab (system audio) in a FlutterFlow web app, record it, and then send it to OpenAI's Whisper API for transcription. However, I've run into several issues:
Using
getDisplayMedia
with{audio: true, video: true}
does not show the "Share audio" checkbox in the browser prompt. Without selecting "Share audio," no audio track is provided by the MediaStream.I tried multiple browsers (Chrome, Edge, Brave) and different mime types (
audio/webm; codecs=opus
,audio/ogg; codecs=opus
) to ensure compatibility with the MediaRecorder and Whisper API.Even when a recording is produced, it either contains no audio track or fails to be recognized by the Whisper API due to format issues.
The core issue: I need a reliable way to capture the system or tab audio so that I can record it and send it to Whisper for transcription directly in FlutterFlow's environment.
Is there a known workaround or a best practice for capturing system (or tab) audio in FlutterFlow apps running on the web? Has anyone successfully managed to include the "Share audio" option consistently or found another approach for capturing system audio?
TabAudioListenerButton (Custom Widget)
// Automatic FlutterFlow imports
import '/backend/backend.dart';
import '/backend/schema/structs/index.dart';
import '/flutter_flow/flutter_flow_theme.dart';
import '/flutter_flow/flutter_flow_util.dart';
import '/custom_code/widgets/index.dart'; // Imports other custom widgets
import '/flutter_flow/custom_functions.dart'; // Imports custom functions
import 'package:flutter/material.dart';
// Begin custom widget code
// DO NOT REMOVE OR MODIFY THE CODE ABOVE!
import 'dart:html' as html;
import 'dart:js_util' as js_util;
import 'dart:typed_data';
import 'package:flutter/foundation.dart' show kIsWeb;
import 'package:http/http.dart' as http;
import 'dart:convert';
class TabAudioListenerButton extends StatefulWidget {
final void Function()? onAudioStreamObtained;
final void Function(String error)? onError;
/// Callback called after transcription is available (once recording stops)
final void Function(String transcription)? onTranscriptionUpdate;
final double? width;
final double? height;
const TabAudioListenerButton({
Key? key,
this.onAudioStreamObtained,
this.onError,
this.onTranscriptionUpdate,
this.width,
this.height,
}) : super(key: key);
@override
_TabAudioListenerButtonState createState() => _TabAudioListenerButtonState();
}
class _TabAudioListenerButtonState extends State<TabAudioListenerButton> {
html.MediaStream? _mediaStream;
Object? _mediaRecorder;
bool _isRecording = false;
List<dynamic> _chunks = [];
@override
void dispose() {
_stopRecording();
super.dispose();
}
Future<html.MediaStream?> getDisplayMedia() async {
final mediaDevices = html.window.navigator.mediaDevices;
if (mediaDevices == null) return null;
try {
// Récupérer uniquement l'audio du micro
final constraints = {'audio': true, 'video': true};
final stream = await js_util.promiseToFuture<html.MediaStream?>(
js_util.callMethod(mediaDevices, 'getDisplayMedia', [constraints]));
return stream;
} catch (e) {
widget.onError?.call(e.toString());
return null;
}
}
Future<void> _startCaptureAndRecord() async {
if (!kIsWeb) {
widget.onError?.call('Not running on web platform.');
return;
}
final mediaStream = await getDisplayMedia();
if (mediaStream != null) {
setState(() {
_mediaStream = mediaStream;
});
widget.onAudioStreamObtained?.call();
_startRecording();
} else {
widget.onError?.call('Failed to get media stream.');
}
}
void _startRecording() {
if (_mediaStream == null || _mediaStream!.getAudioTracks().isEmpty) {
widget.onError?.call('No audio track found.');
}
String mimeType = 'audio/webm; codecs=opus';
bool isSupported = js_util.callMethod(
js_util.getProperty(html.window, 'MediaRecorder'),
'isTypeSupported',
[mimeType]) as bool;
if (!isSupported) {
mimeType = 'audio/ogg; codecs=opus';
isSupported = js_util.callMethod(
js_util.getProperty(html.window, 'MediaRecorder'),
'isTypeSupported',
[mimeType]) as bool;
if (!isSupported) {
widget.onError?.call('No supported mimeType found.');
return;
}
}
_chunks.clear();
final mediaRecorder = js_util
.callConstructor(js_util.getProperty(html.window, 'MediaRecorder'), [
_mediaStream,
{
'mimeType': mimeType,
}
]);
js_util.setProperty(mediaRecorder, 'ondataavailable',
js_util.allowInterop((event) {
final chunk = js_util.getProperty(event, 'data');
if (chunk != null) {
_chunks.add(chunk);
}
}));
js_util.setProperty(mediaRecorder, 'onstop',
js_util.allowInterop((event) async {
if (_chunks.isNotEmpty) {
final blob =
js_util.callConstructor(js_util.getProperty(html.window, 'Blob'), [
_chunks,
{'type': mimeType}
]);
// Téléchargement du blob
final url = html.Url.createObjectUrl(blob);
final anchor = html.document.createElement('a') as html.AnchorElement;
anchor.href = url;
anchor.download = 'audio_recording.webm';
html.document.body!.append(anchor);
anchor.click();
anchor.remove();
html.Url.revokeObjectUrl(url);
final arrayBuffer = await js_util
.promiseToFuture(js_util.callMethod(blob, 'arrayBuffer', []));
final bytes = Uint8List.view((arrayBuffer as ByteBuffer));
final transcription = await transcribeAudioChunk(bytes);
if (transcription != null && transcription.isNotEmpty) {
widget.onTranscriptionUpdate?.call(transcription);
}
_chunks.clear();
}
}));
js_util.callMethod(mediaRecorder, 'start', []);
setState(() {
_mediaRecorder = mediaRecorder;
_isRecording = true;
});
}
void _stopRecording() async {
if (_mediaRecorder != null) {
js_util.callMethod(_mediaRecorder!, 'stop', []);
}
_mediaStream?.getTracks().forEach((track) => track.stop());
setState(() {
_mediaRecorder = null;
_mediaStream = null;
_isRecording = false;
});
}
@override
Widget build(BuildContext context) {
final isCapturing = _mediaStream != null && _isRecording;
Widget content = ElevatedButton(
onPressed: isCapturing ? _stopRecording : _startCaptureAndRecord,
style: ElevatedButton.styleFrom(
backgroundColor: isCapturing ? Colors.red : Colors.green,
),
child: Icon(
isCapturing ? Icons.stop : Icons.play_arrow,
color: Colors.white,
),
);
if (widget.width != null || widget.height != null) {
content = SizedBox(
width: widget.width,
height: widget.height,
child: Center(child: content),
);
} else {
content = Center(child: content);
}
return content;
}
Future<String?> transcribeAudioChunk(Uint8List audioData) async {
const openaiApiKey = "YOUR_OPENAI_API_KEY";
final uri = Uri.parse('https://api.openai.com/v1/audio/transcriptions');
final request = http.MultipartRequest('POST', uri);
request.headers['Authorization'] = 'Bearer $openaiApiKey';
request.files.add(http.MultipartFile.fromBytes('file', audioData,
filename: 'audio.webm'));
request.fields['model'] = 'whisper-1';
final response = await request.send();
if (response.statusCode == 200) {
final respStr = await response.stream.bytesToString();
final jsonResp = json.decode(respStr) as Map<String, dynamic>;
return jsonResp['text'] as String?;
} else {
final respStr = await response.stream.bytesToString();
print('Error transcribing audio: ${response.statusCode} - $respStr');
return null;
}
}
}