Fading Coder

One Final Commit for the Last Sprint

Home > Tech > Content

Real-Time Multiplayer Voice Communication in Unity Using UDP Streaming

Tech May 10 2

Implementing real-time voice communication in Unity requires capturing audio input, encoding and tranmsitting it over the network, and decoding and playing it on remote clients. This approach avoids reliance on third-party SDKs while maintaining low-latency transmission suitable for local or LAN-based multiplayer scenarios.

The core workflow consists of three synchronized components: microphone capture, packetized network streaming, and client-side plabyack. Unlike file-based recording, streaming processes audio in small chunks to minimize delay and memory overhead.

First, define a reusable audio buffer manager that handles raw PCM data acquisition:

using UnityEngine;
using System.Collections.Generic;

public class AudioStreamBuffer
{
    private const int SampleRate = 16000; // Reduced for bandwidth efficiency
    private const int Channels = 1;
    private const int BitsPerSample = 16;
    
    public static short[] CaptureChunk(int durationMs = 20)
    {
        int samplesToCapture = (SampleRate * durationMs) / 1000;
        AudioClip clip = Microphone.Start(null, false, durationMs / 1000, SampleRate);
        
        if (clip == null) return new short[0];
        
        float[] tempBuffer = new float[samplesToCapture * Channels];
        clip.GetData(tempBuffer, 0);
        
        Microphone.End(null);
        
        short[] pcmData = new short[tempBuffer.Length];
        for (int i = 0; i < tempBuffer.Length; i++)
        {
            pcmData[i] = (short)(tempBuffer[i] * short.MaxValue);
        }
        
        return pcmData;
    }
}

Next, implement a transmitter that sends encoded audio frames via UDP at regular intervals. This version uses UdpClient with non-blocking send logic and includes basic sequence numbering for frame ordering:

using System.Net.Sockets;
using System.Net;
using System.Threading;

public class VoiceTransmitter : MonoBehaviour
{
    [SerializeField] private string targetAddress = "127.0.0.1";
    [SerializeField] private int targetPort = 8080;
    
    private UdpClient _client;
    private Thread _sendThread;
    private volatile bool _isStreaming = false;
    private int _sequenceId = 0;
    
    void Start()
    {
        _client = new UdpClient();
    }
    
    public void BeginTransmission()
    {
        if (_isStreaming) return;
        _isStreaming = true;
        _sendThread = new Thread(SendLoop);
        _sendThread.Start();
    }
    
    public void StopTransmission()
    {
        _isStreaming = false;
        _sendThread?.Join(100);
        _client?.Close();
    }
    
    private void SendLoop()
    {
        while (_isStreaming)
        {
            short[] rawFrame = AudioStreamBuffer.CaptureChunk(30);
            if (rawFrame.Length == 0) continue;
            
            byte[] packet = BuildPacket(rawFrame, Interlocked.Increment(ref _sequenceId));
            try
            {
                _client.Send(packet, packet.Length, targetAddress, targetPort);
            }
            catch { /* Ignore transient failures */ }
            
            Thread.Sleep(25); // Maintain ~40 FPS frame rate
        }
    }
    
    private byte[] BuildPacket(short[] data, int seq)
    {
        int payloadSize = data.Length * sizeof(short);
        byte[] packet = new byte[8 + payloadSize]; // 4-byte seq + 4-byte len + payload
        
        System.Buffer.BlockCopy(BitConverter.GetBytes(seq), 0, packet, 0, 4);
        System.Buffer.BlockCopy(BitConverter.GetBytes(data.Length), 0, packet, 4, 4);
        System.Buffer.BlockCopy(data, 0, packet, 8, payloadSize);
        
        return packet;
    }
}

On the receiving end, use a dedicated listener component that decodes incoming packets and queues them for playback using Unity’s AudioSource in streaming mode:

using UnityEngine;
using System.Collections.Generic;
using System.Net.Sockets;
using System.Net;

public class VoiceReceiver : MonoBehaviour
{
    [SerializeField] private int listenPort = 8080;
    
    private AudioSource _audioSource;
    private UdpClient _listener;
    private Queue<AudioClip> _playbackQueue = new Queue<AudioClip>();
    private readonly object _queueLock = new object();
    
    void Start()
    {
        _audioSource = GetComponent<AudioSource>();
        _listener = new UdpClient(listenPort);
        
        // Start background receive loop
        StartCoroutine(ReceiveLoop());
    }
    
    private System.Collections.IEnumerator ReceiveLoop()
    {
        while (true)
        {
            try
            {
                IPEndPoint remote = null;
                byte[] received = _listener.Receive(ref remote);
                
                if (received.Length >= 8)
                {
                    int length = BitConverter.ToInt32(received, 4);
                    short[] decoded = new short[length];
                    System.Buffer.BlockCopy(received, 8, decoded, 0, decoded.Length * sizeof(short));
                    
                    AudioClip clip = AudioClip.Create(
                        "streamed_voice",
                        length,
                        1,
                        16000,
                        false,
                        OnAudioRead,
                        OnAudioSetPosition
                    );
                    
                    clip.SetData(decoded, 0);
                    
                    lock (_queueLock)
                    {
                        _playbackQueue.Enqueue(clip);
                    }
                }
            }
            catch (SocketException) { break; }
            
            yield return null;
        }
    }
    
    void Update()
    {
        lock (_queueLock)
        {
            if (_playbackQueue.Count > 0 && !_audioSource.isPlaying)
            {
                _audioSource.clip = _playbackQueue.Dequeue();
                _audioSource.Play();
            }
        }
    }
    
    private void OnAudioRead(float[] data) { }
    private void OnAudioSetPosition(int position) { }
}

This implementation prioritizes responsiveness over fidelity—reducing sample rate and disabling stereo cuts bandwidth usage by over 60% comparde to default Unity microphone settings. For production deployment, consider adding forward error correction, jitter buffering, or integration with WebRTC for NAT traversal and adaptive bitrate control.

Related Articles

Understanding Strong and Weak References in Java

Strong References Strong reference are the most prevalent type of object referencing in Java. When an object has a strong reference pointing to it, the garbage collector will not reclaim its memory. F...

Comprehensive Guide to SSTI Explained with Payload Bypass Techniques

Introduction Server-Side Template Injection (SSTI) is a vulnerability in web applications where user input is improper handled within the template engine and executed on the server. This exploit can r...

Implement Image Upload Functionality for Django Integrated TinyMCE Editor

Django’s Admin panel is highly user-friendly, and pairing it with TinyMCE, an effective rich text editor, simplifies content management significantly. Combining the two is particular useful for bloggi...

Leave a Comment

Anonymous

◎Feel free to join the discussion and share your thoughts.