| | import { useEffect, useState, useRef } from "react"; |
| | import { Mic, PhoneOff, ChevronDown } from "lucide-react"; |
| | import { INPUT_SAMPLE_RATE } from "./constants"; |
| |
|
| | import WORKLET from "./play-worklet.js"; |
| |
|
| | export default function App() { |
| | const [callStartTime, setCallStartTime] = useState(null); |
| | const [callStarted, setCallStarted] = useState(false); |
| | const [playing, setPlaying] = useState(false); |
| |
|
| | const [voice, setVoice] = useState("af_heart"); |
| | const [voices, setVoices] = useState([]); |
| |
|
| | const [isListening, setIsListening] = useState(false); |
| | const [isSpeaking, setIsSpeaking] = useState(false); |
| | const [listeningScale, setListeningScale] = useState(1); |
| | const [speakingScale, setSpeakingScale] = useState(1); |
| | const [ripples, setRipples] = useState([]); |
| |
|
| | const [ready, setReady] = useState(false); |
| | const [error, setError] = useState(null); |
| | const [elapsedTime, setElapsedTime] = useState("00:00"); |
| | const worker = useRef(null); |
| |
|
| | const node = useRef(null); |
| |
|
| | useEffect(() => { |
| | worker.current?.postMessage({ |
| | type: "set_voice", |
| | voice, |
| | }); |
| | }, [voice]); |
| |
|
| | useEffect(() => { |
| | if (!callStarted) { |
| | |
| | worker.current?.postMessage({ |
| | type: "end_call", |
| | }); |
| | } |
| | }, [callStarted]); |
| |
|
| | useEffect(() => { |
| | if (callStarted && callStartTime) { |
| | const interval = setInterval(() => { |
| | const diff = Math.floor((Date.now() - callStartTime) / 1000); |
| | const minutes = String(Math.floor(diff / 60)).padStart(2, "0"); |
| | const seconds = String(diff % 60).padStart(2, "0"); |
| | setElapsedTime(`${minutes}:${seconds}`); |
| | }, 1000); |
| | return () => clearInterval(interval); |
| | } else { |
| | setElapsedTime("00:00"); |
| | } |
| | }, [callStarted, callStartTime]); |
| |
|
| | useEffect(() => { |
| | worker.current ??= new Worker(new URL("./worker.js", import.meta.url), { |
| | type: "module", |
| | }); |
| |
|
| | const onMessage = ({ data }) => { |
| | console.log("Worker message:", data); |
| | if (data.error) { |
| | return onError(data.error); |
| | } |
| |
|
| | switch (data.type) { |
| | case "status": |
| | if (data.status === "recording_start") { |
| | setIsListening(true); |
| | setIsSpeaking(false); |
| | } else if (data.status === "recording_end") { |
| | setIsListening(false); |
| | } else if (data.status === "ready") { |
| | setVoices(data.voices); |
| | setReady(true); |
| | } |
| | break; |
| | case "output": |
| | if (!playing) { |
| | node.current?.port.postMessage(data.result.audio); |
| | setPlaying(true); |
| | setIsSpeaking(true); |
| | setIsListening(false); |
| | } |
| | break; |
| | } |
| | }; |
| | const onError = (err) => setError(err.message); |
| |
|
| | worker.current.addEventListener("message", onMessage); |
| | worker.current.addEventListener("error", onError); |
| |
|
| | return () => { |
| | worker.current.removeEventListener("message", onMessage); |
| | worker.current.removeEventListener("error", onError); |
| | }; |
| | }, []); |
| |
|
| | useEffect(() => { |
| | if (!callStarted) return; |
| |
|
| | let worklet; |
| | let inputAudioContext; |
| | let source; |
| | let ignore = false; |
| |
|
| | let outputAudioContext; |
| | const audioStreamPromise = navigator.mediaDevices.getUserMedia({ |
| | audio: { |
| | channelCount: 1, |
| | echoCancellation: true, |
| | autoGainControl: true, |
| | noiseSuppression: true, |
| | sampleRate: INPUT_SAMPLE_RATE, |
| | }, |
| | }); |
| |
|
| | audioStreamPromise |
| | .then(async (stream) => { |
| | if (ignore) return; |
| |
|
| | inputAudioContext = new (window.AudioContext || |
| | window.webkitAudioContext)({ |
| | sampleRate: INPUT_SAMPLE_RATE, |
| | }); |
| |
|
| | const analyser = inputAudioContext.createAnalyser(); |
| | analyser.fftSize = 256; |
| | source = inputAudioContext.createMediaStreamSource(stream); |
| | source.connect(analyser); |
| |
|
| | const inputDataArray = new Uint8Array(analyser.frequencyBinCount); |
| |
|
| | function calculateRMS(array) { |
| | let sum = 0; |
| | for (let i = 0; i < array.length; ++i) { |
| | const normalized = array[i] / 128 - 1; |
| | sum += normalized * normalized; |
| | } |
| | const rms = Math.sqrt(sum / array.length); |
| | return rms; |
| | } |
| |
|
| | await inputAudioContext.audioWorklet.addModule( |
| | new URL("./vad-processor.js", import.meta.url), |
| | ); |
| | worklet = new AudioWorkletNode(inputAudioContext, "vad-processor", { |
| | numberOfInputs: 1, |
| | numberOfOutputs: 0, |
| | channelCount: 1, |
| | channelCountMode: "explicit", |
| | channelInterpretation: "discrete", |
| | }); |
| |
|
| | source.connect(worklet); |
| | worklet.port.onmessage = (event) => { |
| | const { buffer } = event.data; |
| | worker.current?.postMessage({ type: "audio", buffer }); |
| | }; |
| |
|
| | outputAudioContext = new AudioContext({ |
| | sampleRate: 24000, |
| | }); |
| | outputAudioContext.resume(); |
| |
|
| | const blob = new Blob([`(${WORKLET.toString()})()`], { |
| | type: "application/javascript", |
| | }); |
| | const url = URL.createObjectURL(blob); |
| | await outputAudioContext.audioWorklet.addModule(url); |
| | URL.revokeObjectURL(url); |
| |
|
| | node.current = new AudioWorkletNode( |
| | outputAudioContext, |
| | "buffered-audio-worklet-processor", |
| | ); |
| |
|
| | node.current.port.onmessage = (event) => { |
| | if (event.data.type === "playback_ended") { |
| | setPlaying(false); |
| | setIsSpeaking(false); |
| | worker.current?.postMessage({ type: "playback_ended" }); |
| | } |
| | }; |
| |
|
| | const outputAnalyser = outputAudioContext.createAnalyser(); |
| | outputAnalyser.fftSize = 256; |
| |
|
| | node.current.connect(outputAnalyser); |
| | outputAnalyser.connect(outputAudioContext.destination); |
| |
|
| | const outputDataArray = new Uint8Array( |
| | outputAnalyser.frequencyBinCount, |
| | ); |
| |
|
| | function updateVisualizers() { |
| | analyser.getByteTimeDomainData(inputDataArray); |
| | const rms = calculateRMS(inputDataArray); |
| | const targetScale = 1 + Math.min(1.25 * rms, 0.25); |
| | setListeningScale((prev) => prev + (targetScale - prev) * 0.25); |
| |
|
| | outputAnalyser.getByteTimeDomainData(outputDataArray); |
| | const outputRMS = calculateRMS(outputDataArray); |
| | const targetOutputScale = 1 + Math.min(1.25 * outputRMS, 0.25); |
| | setSpeakingScale((prev) => prev + (targetOutputScale - prev) * 0.25); |
| |
|
| | requestAnimationFrame(updateVisualizers); |
| | } |
| | updateVisualizers(); |
| | }) |
| | .catch((err) => { |
| | setError(err.message); |
| | console.error(err); |
| | }); |
| |
|
| | return () => { |
| | ignore = true; |
| |
|
| | audioStreamPromise.then((stream) => |
| | stream.getTracks().forEach((track) => track.stop()), |
| | ); |
| | source?.disconnect(); |
| | worklet?.disconnect(); |
| | inputAudioContext?.close(); |
| |
|
| | outputAudioContext?.close(); |
| | }; |
| | }, [callStarted]); |
| |
|
| | useEffect(() => { |
| | if (!callStarted) return; |
| | const interval = setInterval(() => { |
| | const id = Date.now(); |
| | setRipples((prev) => [...prev, id]); |
| | setTimeout(() => { |
| | setRipples((prev) => prev.filter((r) => r !== id)); |
| | }, 1500); |
| | }, 1000); |
| | return () => clearInterval(interval); |
| | }, [callStarted]); |
| |
|
| | return ( |
| | <div className="h-screen min-h-[240px] flex items-center justify-center bg-gray-50 p-4 relative"> |
| | <div className="h-full max-h-[320px] w-[640px] bg-white rounded-xl shadow-lg p-8 flex items-center justify-between space-x-16"> |
| | <div className="text-green-700 w-[140px]"> |
| | <div className="text-xl font-bold flex justify-between"> |
| | {voices?.[voice]?.name} |
| | <span className="font-normal text-gray-500">{elapsedTime}</span> |
| | </div> |
| | <div className="text-base relative"> |
| | <button |
| | type="button" |
| | disabled={!ready} |
| | className={`w-full flex items-center justify-between border border-gray-300 rounded-md transition-colors ${ |
| | ready |
| | ? "bg-transparent hover:border-gray-400" |
| | : "bg-gray-100 opacity-50 cursor-not-allowed" |
| | }`} |
| | > |
| | <span className="px-2 py-1">Select voice</span> |
| | <ChevronDown className="absolute right-2" /> |
| | </button> |
| | <select |
| | value={voice} |
| | onChange={(e) => setVoice(e.target.value)} |
| | className="absolute inset-0 opacity-0 cursor-pointer" |
| | disabled={!ready} |
| | > |
| | {Object.entries(voices).map(([key, v]) => ( |
| | <option key={key} value={key}> |
| | {`${v.name} (${ |
| | v.language === "en-us" ? "American" : v.language |
| | } ${v.gender})`} |
| | </option> |
| | ))} |
| | </select> |
| | </div> |
| | </div> |
| | |
| | <div className="relative flex items-center justify-center w-32 h-32 flex-shrink-0 aspect-square"> |
| | {callStarted && |
| | ripples.map((id) => ( |
| | <div |
| | key={id} |
| | className="absolute inset-0 rounded-full border-2 border-green-200 pointer-events-none" |
| | style={{ animation: "ripple 1.5s ease-out forwards" }} |
| | /> |
| | ))} |
| | <div className="absolute z-10 text-lg text-gray-700"> |
| | {!ready ? "Loading..." : ""} |
| | {isListening && "Listening..."} |
| | {isSpeaking && "Speaking..."} |
| | </div> |
| | {/* Pulsing loader while initializing */} |
| | <div |
| | className={`absolute w-32 h-32 rounded-full bg-green-200 ${ |
| | !ready ? "animate-ping opacity-75" : "" |
| | }`} |
| | style={{ animationDuration: "1.5s" }} |
| | /> |
| | {/* Main rings */} |
| | <div |
| | className={`absolute w-32 h-32 rounded-full shadow-inner transition-transform duration-300 ease-out bg-green-300 ${ |
| | !ready ? "opacity-0" : "" |
| | }`} |
| | style={{ transform: `scale(${speakingScale})` }} |
| | /> |
| | <div |
| | className={`absolute w-32 h-32 rounded-full shadow-inner transition-transform duration-300 ease-out bg-green-200 ${ |
| | !ready ? "opacity-0" : "" |
| | }`} |
| | style={{ transform: `scale(${listeningScale})` }} |
| | /> |
| | </div> |
| | |
| | <div className="space-y-4 w-[140px]"> |
| | {callStarted ? ( |
| | <button |
| | className="flex items-center space-x-2 px-4 py-2 bg-red-100 text-red-700 rounded-md hover:bg-red-200" |
| | onClick={() => { |
| | setCallStarted(false); |
| | setCallStartTime(null); |
| | setPlaying(false); |
| | setIsListening(false); |
| | setIsSpeaking(false); |
| | }} |
| | > |
| | <PhoneOff className="w-5 h-5" /> |
| | <span>End call</span> |
| | </button> |
| | ) : ( |
| | <button |
| | className={`flex items-center space-x-2 px-4 py-2 rounded-md ${ |
| | ready |
| | ? "bg-blue-100 text-blue-700 hover:bg-blue-200" |
| | : "bg-blue-100 text-blue-700 opacity-50 cursor-not-allowed" |
| | }`} |
| | onClick={() => { |
| | setCallStartTime(Date.now()); |
| | setCallStarted(true); |
| | worker.current?.postMessage({ type: "start_call" }); |
| | }} |
| | disabled={!ready} |
| | > |
| | <span>Start call</span> |
| | </button> |
| | )} |
| | </div> |
| | </div> |
| | |
| | <div className="absolute bottom-4 text-sm"> |
| | Built with{" "} |
| | <a |
| | href="https://github.com/huggingface/transformers.js" |
| | rel="noopener noreferrer" |
| | target="_blank" |
| | className="text-blue-600 hover:underline" |
| | > |
| | 🤗 Transformers.js |
| | </a> |
| | </div> |
| | </div> |
| | ); |
| | } |
| |
|