Real-Time Multiplayer Voice Communication in Unity Using UDP Streaming
Implementing real-time voice communication in Unity requires capturing audio input, encoding and tranmsitting it over the network, and decoding and playing it on remote clients. This approach avoids reliance on third-party SDKs while maintaining low-latency transmission suitable for local or LAN-based multiplayer scenarios.
The core workflow consists of three synchronized components: microphone capture, packetized network streaming, and client-side plabyack. Unlike file-based recording, streaming processes audio in small chunks to minimize delay and memory overhead.
First, define a reusable audio buffer manager that handles raw PCM data acquisition:
using UnityEngine;
using System.Collections.Generic;
public class AudioStreamBuffer
{
private const int SampleRate = 16000; // Reduced for bandwidth efficiency
private const int Channels = 1;
private const int BitsPerSample = 16;
public static short[] CaptureChunk(int durationMs = 20)
{
int samplesToCapture = (SampleRate * durationMs) / 1000;
AudioClip clip = Microphone.Start(null, false, durationMs / 1000, SampleRate);
if (clip == null) return new short[0];
float[] tempBuffer = new float[samplesToCapture * Channels];
clip.GetData(tempBuffer, 0);
Microphone.End(null);
short[] pcmData = new short[tempBuffer.Length];
for (int i = 0; i < tempBuffer.Length; i++)
{
pcmData[i] = (short)(tempBuffer[i] * short.MaxValue);
}
return pcmData;
}
}
Next, implement a transmitter that sends encoded audio frames via UDP at regular intervals. This version uses UdpClient with non-blocking send logic and includes basic sequence numbering for frame ordering:
using System.Net.Sockets;
using System.Net;
using System.Threading;
public class VoiceTransmitter : MonoBehaviour
{
[SerializeField] private string targetAddress = "127.0.0.1";
[SerializeField] private int targetPort = 8080;
private UdpClient _client;
private Thread _sendThread;
private volatile bool _isStreaming = false;
private int _sequenceId = 0;
void Start()
{
_client = new UdpClient();
}
public void BeginTransmission()
{
if (_isStreaming) return;
_isStreaming = true;
_sendThread = new Thread(SendLoop);
_sendThread.Start();
}
public void StopTransmission()
{
_isStreaming = false;
_sendThread?.Join(100);
_client?.Close();
}
private void SendLoop()
{
while (_isStreaming)
{
short[] rawFrame = AudioStreamBuffer.CaptureChunk(30);
if (rawFrame.Length == 0) continue;
byte[] packet = BuildPacket(rawFrame, Interlocked.Increment(ref _sequenceId));
try
{
_client.Send(packet, packet.Length, targetAddress, targetPort);
}
catch { /* Ignore transient failures */ }
Thread.Sleep(25); // Maintain ~40 FPS frame rate
}
}
private byte[] BuildPacket(short[] data, int seq)
{
int payloadSize = data.Length * sizeof(short);
byte[] packet = new byte[8 + payloadSize]; // 4-byte seq + 4-byte len + payload
System.Buffer.BlockCopy(BitConverter.GetBytes(seq), 0, packet, 0, 4);
System.Buffer.BlockCopy(BitConverter.GetBytes(data.Length), 0, packet, 4, 4);
System.Buffer.BlockCopy(data, 0, packet, 8, payloadSize);
return packet;
}
}
On the receiving end, use a dedicated listener component that decodes incoming packets and queues them for playback using Unity’s AudioSource in streaming mode:
using UnityEngine;
using System.Collections.Generic;
using System.Net.Sockets;
using System.Net;
public class VoiceReceiver : MonoBehaviour
{
[SerializeField] private int listenPort = 8080;
private AudioSource _audioSource;
private UdpClient _listener;
private Queue<AudioClip> _playbackQueue = new Queue<AudioClip>();
private readonly object _queueLock = new object();
void Start()
{
_audioSource = GetComponent<AudioSource>();
_listener = new UdpClient(listenPort);
// Start background receive loop
StartCoroutine(ReceiveLoop());
}
private System.Collections.IEnumerator ReceiveLoop()
{
while (true)
{
try
{
IPEndPoint remote = null;
byte[] received = _listener.Receive(ref remote);
if (received.Length >= 8)
{
int length = BitConverter.ToInt32(received, 4);
short[] decoded = new short[length];
System.Buffer.BlockCopy(received, 8, decoded, 0, decoded.Length * sizeof(short));
AudioClip clip = AudioClip.Create(
"streamed_voice",
length,
1,
16000,
false,
OnAudioRead,
OnAudioSetPosition
);
clip.SetData(decoded, 0);
lock (_queueLock)
{
_playbackQueue.Enqueue(clip);
}
}
}
catch (SocketException) { break; }
yield return null;
}
}
void Update()
{
lock (_queueLock)
{
if (_playbackQueue.Count > 0 && !_audioSource.isPlaying)
{
_audioSource.clip = _playbackQueue.Dequeue();
_audioSource.Play();
}
}
}
private void OnAudioRead(float[] data) { }
private void OnAudioSetPosition(int position) { }
}
This implementation prioritizes responsiveness over fidelity—reducing sample rate and disabling stereo cuts bandwidth usage by over 60% comparde to default Unity microphone settings. For production deployment, consider adding forward error correction, jitter buffering, or integration with WebRTC for NAT traversal and adaptive bitrate control.