Implementing WebRTC Video Transport in Java via JNI and Native APIs
Native API workflow
WebRTC’s native stack is typically wired together in these stages:
- Spin up three internal threads: worker, network, signaling. If substituting audio capture/codec, initialize them now.
- Build a PeerConnectionFactory; everything else derives from it (peer connections, sources, tracks).
- Create a PeerConnection with RTCConfiguration (ICE servers, policies). If port control is needed, inject a custom PortAllocator.
- Produce AudioSource/VideoSource. AudioSource can take collection options; VideoSource requires a capturer. For custom frames, implement a custom capturer.
- Wrap sources into tracks (AudioTrack/VideoTrack).
- Create a MediaStream and add the tracks.
- Attach the MediaStream to the PeerConnection.
- Drive signaling through observers and PeerConnection APIs: exchange SDP and ICE candidates.
Only steps 1–2 are "native-first"; from step 3 onward the flow mirrors the browser API.
JNI vs JNA for WebRTC
JNA simplifies calling plain C functions from Java but falls short when you need native-to-Java callbacks (observers) and tight per-frame latency. JNI enables:
- Native code invoking Java observers (required by PeerConnection, DataChannel, etc.).
- Lower overhead call paths for per-frame video/audio marshaling.
For bidirectional callbacks and performance-sensitive media paths, prefer JNI.
Project layout
Java
- script/build-headers.sh: emits C headers for JNI from compiled classes
#!/usr/bin/env bash
find ../path/to/rtc4j/core -maxdepth 1 -type f -name '*.class' \
| sed 's#.class$##' \
| sed 's#^#package.name.of.core.#' \
| xargs javah -classpath ../target/classes -d ../../cpp/src/jni/
- src/.../core: wrappers over core WebRTC types and capture:
- RtcBridge -> webrtc::PeerConnectionFactoryInterface
- PeerLink -> webrtc::PeerConnectionInterface
- DataPipe -> webrtc::DataChannelInterface
- src/.../model: POJOs for SDP, ICE, payloads
- src/.../utils: cross-platform shared-lib loader
C++
- src/jni: generated headers, JNI helpers
- src/media: custom audio device module and video capturer
- Audio: custom AudioDeviceModule injected into PeerConnectionFactory
- Video: custom capturer fed to CreateVideoSource
- H264 encoding via FFmpeg (libx264) and h264_nvenc when available
- src/rtc: implementations of Java wrappers
- src/rtc/network: custom SocketFactory for allowed ports and IP ranges
Native dependencies and build
CMake-based build with libwebrtc, FFmpeg, libjpeg-turbo.
cmake_minimum_required(VERSION 3.16)
project(rtc4j_native)
set(CMAKE_CXX_STANDARD 11)
if(APPLE)
set(CMAKE_CXX_FLAGS "-fno-rtti -pthread")
elif(UNIX)
set(CMAKE_CXX_FLAGS "-fno-rtti -pthread -lva -lva-drm -lva-x11 -llzma -lX11 -lz -ldl -ltheoraenc -ltheoradec")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-Bsymbolic")
endif()
include(./CMakeModules/FindFFMPEG.cmake)
include(./CMakeModules/FindLibJpegTurbo.cmake)
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
add_compile_definitions(WEBRTC_LINUX)
elif(CMAKE_SYSTEM_NAME MATCHES "Darwin")
add_compile_definitions(WEBRTC_MAC)
endif()
find_package(LibWebRTC REQUIRED)
find_package(JNI REQUIRED)
include_directories(${Java_INCLUDE_PATH} ${Java_INCLUDE_PATH2})
include(${LIBWEBRTC_USE_FILE})
include_directories(src ${CMAKE_CURRENT_BINARY_DIR} ${TURBO_INCLUDE_DIRS})
file(GLOB_RECURSE SRC_FILES src/*.cpp)
file(GLOB_RECURSE HDR_FILES src/*.h)
add_library(rtc SHARED ${SRC_FILES} ${HDR_FILES})
target_include_directories(rtc PRIVATE ${FFMPEG_INCLUDE_DIRS} ${TURBO_INCLUDE_DIRS})
target_link_libraries(rtc PRIVATE ${FFMPEG_LIBRARIES} ${TURBO_LIBRARIES} ${LIBWEBRTC_LIBRARIES})
FFmpeg build notes
- Enable shared builds and compile with -fPIC on Linux to avoid relocation errors.
- For NVIDIA, enable nvenc/cuvid/libnpp if the host GPU/driver supports it.
Example configure (adjust paths):
export PATH="$HOME/bin:$PATH"
export PKG_CONFIG_PATH="$HOME/ffmpeg_build/lib/pkgconfig"
./configure \
--prefix="$HOME/ffmpeg_build" \
--pkg-config-flags="--static" \
--extra-cflags="-I$HOME/ffmpeg_build/include" \
--extra-ldflags="-L$HOME/ffmpeg_build/lib" \
--extra-libs="-lpthread -lm" \
--bindir="$HOME/bin" \
--enable-gpl \
--enable-libx264 --enable-libx265 \
--enable-libopus --enable-libvorbis --enable-libvpx \
--enable-libmp3lame --enable-libfdk_aac --enable-libfreetype \
--enable-nonfree \
--enable-shared \
--cc="gcc -m64 -fPIC" \
--enable-nvenc --enable-cuda --enable-cuvid --enable-libnpp \
--extra-cflags="-I/usr/local/cuda/include" \
--extra-ldflags="-L/usr/local/cuda/lib64"
libjpeg-turbo discovery (CMake)
# FindLibJpegTurbo.cmake
find_path(TURBO_INCLUDE_DIRS turbojpeg.h /opt/libjpeg-turbo/include)
find_library(TURBO_LIBRARY NAMES turbojpeg libturbojpeg.a PATHS /opt/libjpeg-turbo/lib64)
find_library(JPEG_LIBRARY NAMES jpeg libjpeg.a PATHS /opt/libjpeg-turbo/lib64)
if(TURBO_LIBRARY AND TURBO_INCLUDE_DIRS)
set(TURBO_FOUND TRUE)
set(TURBO_LIBRARIES ${TURBO_LIBRARY} ${JPEG_LIBRARY})
message(STATUS "Found Turbo: ${TURBO_LIBRARIES} at ${TURBO_INCLUDE_DIRS}")
else()
message(STATUS "TurboJPEG not found")
endif()
Wiring up Native APIs
Threads and PeerConnectionFactory
Create the three WebRTC threads, making the network thread with a socket server:
void RtcBridge::InitThreads() {
signal_thread_ = rtc::Thread::Create();
signal_thread_->SetName("signal", nullptr);
RTC_CHECK(signal_thread_->Start());
work_thread_ = rtc::Thread::Create();
work_thread_->SetName("worker", nullptr);
RTC_CHECK(work_thread_->Start());
net_thread_ = rtc::Thread::CreateWithSocketServer();
net_thread_->SetName("network", nullptr);
RTC_CHECK(net_thread_->Start());
}
Initialize a custom AudioDeviceModule on the worker thread and remember too release it there:
void RtcBridge::Init(jobject j_audio, jobject j_video) {
video_capturer_global_ = j_video;
InitThreads();
adm_ = work_thread_->Invoke<rtc::scoped_refptr<webrtc::AudioDeviceModule>>(RTC_FROM_HERE, [this, j_audio] {
return rtc::scoped_refptr<webrtc::AudioDeviceModule>(
new rtc::RefCountedObject<JvmAudioDeviceModule>(
JvmAudioDeviceModule::WrapJavaCapturer(j_audio),
JvmAudioDeviceModule::CreateNullRenderer(44100))
);
});
InitFactory();
}
void RtcBridge::ReleaseAdmOnWorker() {
RTC_DCHECK(work_thread_.get() == rtc::Thread::Current());
adm_ = nullptr;
}
Create PeerConnectionFactory and the networking components used later for port scoping:
void RtcBridge::InitFactory() {
ip_whitelist_prefix_ = white_prefix_;
sock_factory_ = std::make_unique<rtc::SocketFactoryWrapper>(net_thread_.get(), ip_whitelist_prefix_, lo_port_, hi_port_);
net_manager_ = std::make_unique<rtc::BasicNetworkManager>();
pc_factory_ = webrtc::CreatePeerConnectionFactory(
net_thread_.get(), work_thread_.get(), signal_thread_.get(), adm_,
webrtc::CreateBuiltinAudioEncoderFactory(),
webrtc::CreateBuiltinAudioDecoderFactory(),
CreateCustomVideoEncoderFactory(hardware_encode_),
CreateCustomVideoDecoderFactory(),
nullptr, nullptr);
}
PeerConnection
Inject ICE config and custom PortAllocator using the restricted socket factory:
PeerLink* RtcBridge::CreatePeer(
PeerObserver* obs,
std::string stun_turn_uri,
std::string user,
std::string secret,
int max_bps) {
webrtc::PeerConnectionInterface::RTCConfiguration cfg;
webrtc::PeerConnectionInterface::IceServer srv;
srv.uri = std::move(stun_turn_uri);
srv.username = std::move(user);
srv.password = std::move(secret);
cfg.servers.push_back(srv);
cfg.tcp_candidate_policy = webrtc::PeerConnectionInterface::kTcpCandidatePolicyDisabled;
cfg.audio_jitter_buffer_fast_accelerate = true;
auto allocator = std::make_unique<cricket::BasicPortAllocator>(net_manager_.get(), sock_factory_.get());
allocator->SetPortRange(lo_port_, hi_port_);
auto pc = pc_factory_->CreatePeerConnection(cfg, std::move(allocator), nullptr, obs);
return new PeerLink(pc, obs, /*has_audio_hw*/true, max_bps);
}
void PeerLink::SetMaxBitrate(int target_bps) {
webrtc::BitrateSettings br;
br.min_bitrate_bps = 30000;
br.start_bitrate_bps = target_bps;
br.max_bitrate_bps = target_bps;
peer_->SetBitrate(br);
}
Sources and tracks
Create sources and tracks. Build the video capturer on the signaling thread.
cricket::AudioOptions PeerLink::DefaultAudioOpts() {
cricket::AudioOptions o;
o.audio_jitter_buffer_fast_accelerate = true;
o.audio_jitter_buffer_max_packets = 10;
o.echo_cancellation = false;
o.auto_gain_control = false;
o.noise_suppression = false;
o.highpass_filter = false;
o.stereo_swapping = false;
o.typing_detection = false;
o.experimental_agc = false;
o.extended_filter_aec = false;
o.delay_agnostic_aec = false;
o.experimental_ns = false;
o.residual_echo_detector = false;
o.audio_network_adaptor = true;
return o;
}
rtc::scoped_refptr<webrtc::AudioSourceInterface> RtcBridge::MakeAudioSource(const cricket::AudioOptions& opt) {
return pc_factory_->CreateAudioSource(opt);
}
JvmVideoCapturer* RtcBridge::MakeVideoCapturerOnSignal() {
if (!video_capturer_global_) return nullptr;
return signal_thread_->Invoke<JvmVideoCapturer*>(RTC_FROM_HERE, [this] {
return CreateJvmVideoCapturer(video_capturer_global_);
});
}
// usage
auto audio_src = rtc->MakeAudioSource(peer->DefaultAudioOpts());
auto vid_src = rtc->CreateVideoSource(rtc->MakeVideoCapturerOnSignal());
auto vtrack = rtc->CreateVideoTrack("v0", vid_src.get());
auto atrack = rtc->CreateAudioTrack("a0", audio_src);
MediaStream and add to PeerConnection
auto stream = rtc->CreateLocalMediaStream("m");
stream->AddTrack(vtrack);
stream->AddTrack(atrack);
peer_->AddStream(stream);
Data channel
Create the local endpoint and wire an observer. The remote endpoint appears via PeerConnection::OnDataChannel. JNI threads must be attached when invoking Java from native callbacks.
DataPipe* PeerLink::OpenDataChannel(std::string label,
webrtc::DataChannelInit opts,
DataPipeObserver* obs) {
auto dc = peer_->CreateDataChannel(label, &opts);
dc->RegisterObserver(obs);
return new DataPipe(dc, obs);
}
void DataPipe::Send(webrtc::DataBuffer& buf) {
chan_->Send(buf);
}
void DataPipeObserverImpl::OnMessage(const webrtc::DataBuffer& buf) {
JNIEnv* env = AttachCurrentThread();
jbyteArray payload = env->NewByteArray(static_cast<jsize>(buf.data.size()));
env->SetByteArrayRegion(payload, 0, static_cast<jsize>(buf.data.size()),
reinterpret_cast<const jbyte*>(buf.data.cdata()));
jclass cls = env->GetObjectClass(java_observer_);
jmethodID mid = env->GetMethodID(cls, "onMessage", "([BZ)V");
env->CallVoidMethod(java_observer_, mid, payload, (jboolean)buf.binary);
env->DeleteLocalRef(payload);
env->DeleteLocalRef(cls);
}
JNIEnv* AttachCurrentThread() {
void* e = nullptr;
if (g_vm->GetEnv(&e, JNI_VERSION_1_8) == JNI_OK) return reinterpret_cast<JNIEnv*>(e);
JavaVMAttachArgs a{JNI_VERSION_1_8, const_cast<char*>("rtc-thread"), nullptr};
JNIEnv* env = nullptr;
RTC_CHECK(g_vm->AttachCurrentThread(reinterpret_cast<void**>(&env), &a) == JNI_OK);
return env;
}
void DetachCurrentThreadIfNeeded() {
void* e = nullptr;
if (g_vm->GetEnv(&e, JNI_VERSION_1_8) == JNI_EDETACHED) return;
g_vm->DetachCurrentThread();
}
Offer/answer and descriptions
Offer/answer options exclude receiving media when this side only publishes:
void PeerLink::CreateOffer(jobject j_obs) {
create_sdp_obs_->Bind(j_obs, "offer");
webrtc::PeerConnectionInterface::RTCOfferAnswerOptions o;
o.offer_to_receive_audio = false;
o.offer_to_receive_video = false;
peer_->CreateOffer(create_sdp_obs_, o);
}
void PeerLink::CreateAnswer(jobject j_obs) {
create_sdp_obs_->Bind(j_obs, "answer");
webrtc::PeerConnectionInterface::RTCOfferAnswerOptions o;
o.offer_to_receive_audio = false;
o.offer_to_receive_video = false;
peer_->CreateAnswer(create_sdp_obs_, o);
}
webrtc::SdpParseError PeerLink::SetLocal(JNIEnv* env, jobject j_sdp) {
webrtc::SdpParseError err;
auto desc = webrtc::CreateSessionDescription(
GetString(env, j_sdp, "type"), GetString(env, j_sdp, "sdp"), &err);
peer_->SetLocalDescription(set_sdp_obs_, desc);
return err;
}
webrtc::SdpParseError PeerLink::SetRemote(JNIEnv* env, jobject j_sdp) {
webrtc::SdpParseError err;
auto desc = webrtc::CreateSessionDescription(
GetString(env, j_sdp, "type"), GetString(env, j_sdp, "sdp"), &err);
peer_->SetRemoteDescription(set_sdp_obs_, desc);
return err;
}
Java-side signaling sketch:
peer.createOffer(sdp -> executor.submit(() -> {
peer.setLocalDescription(sdp);
sendToRemote(sdp);
}));
// on remote SDP answer
peer.setRemoteDescription(remoteSdp);
Resource teardown
Java:
public void disposeAll() {
lock.lock();
try {
if (remoteData != null) { remoteData.close(); remoteData = null; }
if (localData != null) { localData.close(); localData = null; }
if (peer != null) { peer.close(); peer = null; }
if (rtc != null) { rtc.close(); }
destroyed = true;
} finally {
lock.unlock();
}
}
C++:
DataPipe::~DataPipe() {
chan_->UnregisterObserver();
delete observer_;
chan_->Close();
chan_ = nullptr;
}
PeerLink::~PeerLink() {
peer_->Close();
peer_ = nullptr;
delete obs_;
delete set_sdp_obs_;
delete create_sdp_obs_;
}
RtcBridge::~RtcBridge() {
pc_factory_ = nullptr;
work_thread_->Invoke<void>(RTC_FROM_HERE, [this]{ ReleaseAdmOnWorker(); });
signal_thread_->Invoke<void>(RTC_FROM_HERE, [this]{ DetachCurrentThreadIfNeeded(); });
work_thread_->Invoke<void>(RTC_FROM_HERE, [this]{ DetachCurrentThreadIfNeeded(); });
net_thread_->Invoke<void>(RTC_FROM_HERE, [this]{ DetachCurrentThreadIfNeeded(); });
work_thread_->Stop(); signal_thread_->Stop(); net_thread_->Stop();
work_thread_.reset(); signal_thread_.reset(); net_thread_.reset();
net_manager_ = nullptr; sock_factory_ = nullptr;
if (video_capturer_global_) {
JNIEnv* env = AttachCurrentThread();
env->DeleteGlobalRef(video_capturer_global_);
video_capturer_global_ = nullptr;
}
}
Custom audio capture from Java
AudioDeviceModule is the hook used by WebRTC to push/pull PCM. Implemented methods include device enumeration, initialization, start/stop for record/playout, and RegisterAudioCallback to hand captured frames upward.
Define thin capture/render interfaces backing the ADM:
class AudioSource {
public:
virtual ~AudioSource() = default;
virtual int SampleHz() = 0;
virtual bool Read10ms(rtc::BufferT<int16_t>* out) = 0;
virtual bool IsJavaBacked() { return false; }
};
class AudioSink {
public:
virtual ~AudioSink() = default;
virtual int SampleHz() const = 0;
virtual bool Write(rtc::ArrayView<const int16_t> data) = 0;
};
Java-backed capturer using direct ByteBuffer:
class JavaPcmSource final : public AudioSource {
public:
explicit JavaPcmSource(jobject j_src)
: j_src_(j_src) {
JNIEnv* env = AttachCurrentThread();
cls_ = env->GetObjectClass(j_src_);
mid_rate_ = env->GetMethodID(cls_, "samplingFrequency", "()I");
mid_cap_ = env->GetMethodID(cls_, "capture", "(I)Ljava/nio/ByteBuffer;");
}
~JavaPcmSource() override {
JNIEnv* env = AttachCurrentThread();
if (cls_) env->DeleteLocalRef(cls_);
if (j_src_) env->DeleteGlobalRef(j_src_);
}
bool IsJavaBacked() override { return true; }
int SampleHz() override {
if (!hz_) {
JNIEnv* env = AttachCurrentThread();
hz_ = env->CallIntMethod(j_src_, mid_rate_);
}
return hz_;
}
bool Read10ms(rtc::BufferT<int16_t>* out) override {
out->SetData(SamplesPerFrame(SampleHz()), [&](rtc::ArrayView<int16_t> dst){
JNIEnv* env = AttachCurrentThread();
jobject buf = env->CallObjectMethod(j_src_, mid_cap_, dst.size() * 2);
auto* addr = static_cast<int8_t*>(env->GetDirectBufferAddress(buf));
jlong cap = env->GetDirectBufferCapacity(buf);
size_t n = static_cast<size_t>(cap) / 2;
memcpy(dst.data(), addr, n * 2);
env->DeleteLocalRef(buf);
return n;
});
return out->size() == out->capacity();
}
private:
jobject j_src_{}; jclass cls_{}; jmethodID mid_rate_{}, mid_cap_{}; int hz_{};
};
constexpr int kFrameMs = 10;
constexpr int kFps = 1000 / kFrameMs;
inline size_t SamplesPerFrame(int hz) { return rtc::CheckedDivExact(hz, kFps); }
Minimal sink discarding playout:
class NullSink final : public AudioSink {
public:
explicit NullSink(int hz): hz_(hz) {}
int SampleHz() const override { return hz_; }
bool Write(rtc::ArrayView<const int16_t>) override { return true; }
private: int hz_;
};
AudioDeviceModule skeleton with periodic tick using WebRTC’s cross-platform primitives:
class JvmAudioDeviceModule : public webrtc::AudioDeviceModule {
public:
JvmAudioDeviceModule(std::unique_ptr<AudioSource> src,
std::unique_ptr<AudioSink> sink,
float speed)
: src_(std::move(src)), sink_(std::move(sink)), speed_(speed),
tick_(webrtc::EventTimerWrapper::Create()),
thread_(&JvmAudioDeviceModule::Run, this, "JvmADM") {}
int32_t Init() override {
RTC_CHECK(tick_->StartTimer(true, kFrameMs / speed_));
thread_.Start();
thread_.SetPriority(rtc::kHighPriority);
return 0;
}
int32_t RegisterAudioCallback(webrtc::AudioTransport* cb) override {
rtc::CritScope cs(&lock_);
cb_ = cb; return 0;
}
int32_t StartRecording() override {
rtc::CritScope cs(&lock_);
capturing_ = true; done_capture_.Reset();
return 0;
}
int32_t StopRecording() override {
rtc::CritScope cs(&lock_);
capturing_ = false; done_capture_.Set();
return 0;
}
int32_t StartPlayout() override {
rtc::CritScope cs(&lock_);
playing_ = true; done_play_.Reset();
return 0;
}
int32_t StopPlayout() override {
rtc::CritScope cs(&lock_);
playing_ = false; done_play_.Set();
return 0;
}
~JvmAudioDeviceModule() override {
StopPlayout(); StopRecording();
need_detach_ = true;
while (!detached_) {}
thread_.Stop();
}
private:
static bool Run(void* p) { static_cast<JvmAudioDeviceModule*>(p)->Pump(); return true; }
void Pump() {
{
rtc::CritScope cs(&lock_);
if (capturing_ && cb_) {
const bool ok = src_->Read10ms(&rec_buf_);
uint32_t mic_level;
if (ok) {
cb_->RecordedDataIsAvailable(rec_buf_.data(), rec_buf_.size(), 2, 1,
src_->SampleHz(), 0, 0, 0, false, mic_level);
}
if (!ok) { capturing_ = false; done_capture_.Set(); }
}
if (playing_ && cb_ && sink_) {
size_t samples_out; int64_t et, ntp;
cb_->NeedMorePlayData(SamplesPerFrame(sink_->SampleHz()), 2, 1, sink_->SampleHz(),
play_buf_.data(), samples_out, &et, &ntp);
if (!sink_->Write(rtc::ArrayView<const int16_t>(play_buf_.data(), samples_out))) {
playing_ = false; done_play_.Set();
}
}
if (src_->IsJavaBacked() && need_detach_ && !detached_) {
DetachCurrentThreadIfNeeded();
detached_ = true;
}
}
tick_->Wait(WEBRTC_EVENT_INFINITE);
}
rtc::CriticalSection lock_;
webrtc::AudioTransport* cb_{};
std::unique_ptr<AudioSource> src_;
std::unique_ptr<AudioSink> sink_;
bool capturing_{}; bool playing_{};
rtc::Event done_capture_{true, true};
rtc::Event done_play_{true, true};
std::unique_ptr<webrtc::EventTimerWrapper> tick_;
rtc::PlatformThread thread_;
rtc::BufferT<int16_t> rec_buf_;
rtc::BufferT<int16_t> play_buf_;
float speed_{}; bool need_detach_{}; bool detached_{};
};
Custom video capture from Java
Create a VideoCapturer that pulls JPEG frames from Java and converts to I420 using libjpeg-turbo. Build on signaling thread when creating the VideoSource.
auto vsrc = rtc->CreateVideoSource(rtc->MakeVideoCapturerOnSignal());
Capturer core:
class JvmVideoCapturer : public cricket::VideoCapturer {
public:
explicit JvmVideoCapturer(jobject j_cap)
: j_cap_(j_cap), tick_(webrtc::EventTimerWrapper::Create()),
thread_(&JvmVideoCapturer::Run, this, "JvmVideo") {
JNIEnv* env = AttachCurrentThread();
cls_ = env->GetObjectClass(j_cap_);
mid_w_ = env->GetMethodID(cls_, "getWidth", "()I");
mid_h_ = env->GetMethodID(cls_, "getHeight", "()I");
mid_f_ = env->GetMethodID(cls_, "getFps", "()I");
mid_cap_ = env->GetMethodID(cls_, "capture", "()Lpkg/VideoFrame;");
width_ = env->CallIntMethod(j_cap_, mid_w_);
height_= env->CallIntMethod(j_cap_, mid_h_);
fps_ = env->CallIntMethod(j_cap_, mid_f_);
static const cricket::VideoFormat formats[] = {
{width_, height_, cricket::VideoFormat::FpsToInterval(fps_), cricket::FOURCC_I420}
};
SetSupportedFormats({&formats[0], &formats[1]});
RTC_CHECK(tick_->StartTimer(true, rtc::kNumMillisecsPerSec / fps_));
thread_.Start(); thread_.SetPriority(rtc::kHighPriority);
tj_ = tjInitDecompress();
}
~JvmVideoCapturer() override {
thread_.Stop(); SignalDestroyed(this);
JNIEnv* env = AttachCurrentThread();
if (cls_) env->DeleteLocalRef(cls_);
if (tj_) tjDestroy(tj_);
}
cricket::CaptureState Start(const cricket::VideoFormat& f) override {
running_ = true; SetCaptureState(cricket::CS_RUNNING); return cricket::CS_RUNNING;
}
void Stop() override { running_ = false; SetCaptureState(cricket::CS_STOPPED); }
bool GetPreferredFourccs(std::vector<uint32_t>* v) override { v->push_back(cricket::FOURCC_I420); return true; }
private:
static bool Run(void* p) { static_cast<JvmVideoCapturer*>(p)->Tick(); return true; }
void Tick() {
{
rtc::CritScope cs(&lock_);
if (!running_) { tick_->Wait(WEBRTC_EVENT_INFINITE); return; }
int64_t ts = rtc::TimeMicros();
JNIEnv* env = AttachCurrentThread();
jobject j_frame = env->CallObjectMethod(j_cap_, mid_cap_);
if (!j_frame) {
auto i420 = webrtc::I420Buffer::Create(width_, height_);
webrtc::I420Buffer::SetBlack(i420);
OnFrame(webrtc::VideoFrame(i420, webrtc::kVideoRotation_0, ts), width_, height_);
tick_->Wait(WEBRTC_EVENT_INFINITE); return;
}
jobject j_buf = env->CallObjectMethod(j_frame, GetBufferGetter());
auto* data = static_cast<unsigned char*>(env->GetDirectBufferAddress(j_buf));
auto len = static_cast<unsigned long>(env->CallIntMethod(j_frame, GetLengthGetter()));
int rotation = env->CallIntMethod(j_frame, GetRotationGetter());
int w, h; tjDecompressHeader(tj_, data, len, &w, &h);
auto i420 = webrtc::I420Buffer::Create(w, h,
w % 32 ? (w/32*32+32) : w,
(w/2) % 32 ? ((w/2)/32*32+32) : (w/2),
(w/2) % 32 ? ((w/2)/32*32+32) : (w/2));
uint8_t* planes[] = { i420->MutableDataY(), i420->MutableDataU(), i420->MutableDataV() };
int strides[] = { i420->StrideY(), i420->StrideU(), i420->StrideV() };
tjDecompressToYUVPlanes(tj_, data, len, planes, w, strides, h, TJFLAG_FASTDCT | TJFLAG_NOREALLOC);
env->DeleteLocalRef(j_buf); env->DeleteLocalRef(j_frame);
OnFrame(webrtc::VideoFrame(i420, static_cast<webrtc::VideoRotation>(rotation), ts), w, h);
}
tick_->Wait(WEBRTC_EVENT_INFINITE);
}
rtc::CriticalSection lock_;
bool running_{}; int width_{}, height_{}, fps_{};
webrtc::EventTimerWrapper* ticker{}; // represented by tick_
std::unique_ptr<webrtc::EventTimerWrapper> tick_;
rtc::PlatformThread thread_;
jobject j_cap_{}; jclass cls_{}; tjhandle tj_{};
jmethodID mid_w_{}, mid_h_{}, mid_f_{}, mid_cap_{};
};
Port range and IP restrictions
The PortAllocator range setter alone may not constrain all sockets. Wrap the packet socket factory and enforce allowed port ranges and private IP allowlists.
rtc::AsyncPacketSocket* SocketFactoryWrapper::CreateUdpSocket(
const rtc::SocketAddress& local, uint16_t lo, uint16_t hi) {
if (lo < lo_port_ || hi > hi_port_) {
return nullptr;
}
const bool ok_ip = (!local.IsPrivateIP()) || local.HostAsURIString().rfind(whitelist_prefix_, 0) == 0;
if (!ok_ip) return nullptr;
auto* s = BasicPacketSocketFactory::CreateUdpSocket(local, lo, hi);
return s;
}
Prefer H.264 over VP8 at negotiation
Advertise only H.264 with packetization-mode=1 (non-interleaved):
std::vector<webrtc::SdpVideoFormat> SupportedFormats() const override {
std::vector<webrtc::SdpVideoFormat> v;
auto id = webrtc::H264::ProfileLevelId(webrtc::H264::kProfileBaseline, webrtc::H264::kLevel3_1);
auto s = webrtc::H264::ProfileLevelIdToString(id);
v.emplace_back(cricket::kH264CodecName,
{{cricket::kH264FmtpProfileLevelId, *s},
{cricket::kH264FmtpLevelAsymmetryAllowed, "1"},
{cricket::kH264FmtpPacketizationMode, "1"}});
return v;
}
FFmpeg-based H.264 encoder (libx264/h264_nvenc)
Implement WebRTC’s VideoEncoder interface with FFmpeg, supporting simulcast layers and runtime bitrate changes.
Encoder context per layer:
typedef struct {
AVCodec* codec = nullptr;
AVCodecContext* ctx = nullptr;
AVFrame* frame = nullptr;
AVPacket* pkt = nullptr;
} AvEncoder;
Members:
std::vector<AvEncoder*> encoders_;
std::vector<LayerConfig> cfgs_;
std::vector<webrtc::EncodedImage> images_;
std::vector<std::unique_ptr<uint8_t[]>> image_bufs_;
webrtc::VideoCodec codec_;
webrtc::H264PacketizationMode mode_ { webrtc::H264PacketizationMode::SingleNalUnit };
webrtc::EncodedImageCallback* cb_{};
int32_t cores_{}; size_t max_payload_{}; bool hw_{};
Initialization:
int32_t InitEncode(const webrtc::VideoCodec* inst, int32_t cores, size_t max_payload) override {
if (!inst || inst->codecType != webrtc::kVideoCodecH264 || !inst->maxFramerate || inst->width < 1 || inst->height < 1)
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
Release();
int streams = webrtc::SimulcastUtility::NumberOfSimulcastStreams(*inst);
if (streams > 1 && (!webrtc::SimulcastUtility::ValidSimulcastResolutions(*inst, streams) ||
!webrtc::SimulcastUtility::ValidSimulcastTemporalLayers(*inst, streams)))
return WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED;
images_.resize(streams);
image_bufs_.resize(streams);
encoders_.resize(streams);
cfgs_.resize(streams);
for (int i = 0; i < streams; ++i) encoders_[i] = new AvEncoder();
cores_ = cores; max_payload_ = max_payload; codec_ = *inst;
if (!codec_.numberOfSimulcastStreams) {
codec_.simulcastStream[0].width = codec_.width;
codec_.simulcastStream[0].height = codec_.height;
}
for (int i = 0, idx = streams - 1; i < streams; ++i, --idx) {
if (inst->simulcastStream[i].numberOfTemporalLayers > 1) return WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED;
auto& c = cfgs_[i];
c.simulcast_idx = idx;
c.sending = false;
c.width = codec_.simulcastStream[idx].width;
c.height = codec_.simulcastStream[idx].height;
c.max_frame_rate = static_cast<float>(codec_.maxFramerate);
c.frame_dropping_on = codec_.H264()->frameDroppingOn;
c.key_frame_interval = codec_.H264()->keyFrameInterval;
c.max_bps = codec_.maxBitrate * 1000;
c.target_bps = codec_.startBitrate * 1000;
if (!OpenEncoder(encoders_[i], c)) return WEBRTC_VIDEO_CODEC_ERROR;
images_[i]._size = CalcBufferSize(webrtc::VideoType::kI420, c.width, c.height);
images_[i]._buffer = new uint8_t[images_[i]._size];
image_bufs_[i].reset(images_[i]._buffer);
images_[i]._completeFrame = true;
images_[i]._encodedWidth = c.width;
images_[i]._encodedHeight = c.height;
images_[i]._length = 0;
}
webrtc::SimulcastRateAllocator alloc(codec_);
return SetRateAllocation(alloc.GetAllocation(codec_.startBitrate * 1000, codec_.maxFramerate), codec_.maxFramerate);
}
Opening the FFmpeg encoder:
bool OpenEncoder(AvEncoder* e, LayerConfig& c) {
#ifdef WEBRTC_LINUX
if (hw_) e->codec = avcodec_find_encoder_by_name("h264_nvenc");
#endif
if (!e->codec) e->codec = avcodec_find_encoder_by_name("libx264");
if (!e->codec) return false;
e->ctx = avcodec_alloc_context3(e->codec);
if (!e->ctx) return false;
c.target_bps = c.max_bps;
SetContext(e, c, /*init=*/true);
if (avcodec_open2(e->ctx, e->codec, nullptr) < 0) { avcodec_free_context(&e->ctx); return false; }
e->frame = av_frame_alloc();
if (!e->frame) return false;
e->frame->format = e->ctx->pix_fmt = AV_PIX_FMT_YUV420P;
e->frame->width = e->ctx->width = c.width;
e->frame->height = e->ctx->height = c.height;
e->ctx->time_base = AVRational{1, 25};
e->ctx->gop_size = c.key_frame_interval;
e->ctx->max_b_frames = 0;
e->ctx->codec_type = AVMEDIA_TYPE_VIDEO;
e->ctx->codec_id = AV_CODEC_ID_H264;
if (std::string(e->codec->name) == "libx264") {
av_opt_set(e->ctx->priv_data, "preset", "ultrafast", 0);
av_opt_set(e->ctx->priv_data, "tune", "zerolatency", 0);
}
if (av_image_alloc(e->frame->data, e->frame->linesize, e->ctx->width, e->ctx->height, e->ctx->pix_fmt, 32) < 0)
return false;
e->frame->pts = 1;
e->pkt = av_packet_alloc();
return true;
}
Dynamic bitrate via FFmpeg context updates:
void SetContext(AvEncoder* e, LayerConfig& c, bool init) {
c.key_frame_request = true;
e->ctx->width = c.width;
e->ctx->height = c.height;
e->ctx->bit_rate = static_cast<int64_t>(c.target_bps * 0.7);
e->ctx->rc_max_rate = static_cast<int64_t>(c.target_bps * 0.85);
e->ctx->rc_min_rate = static_cast<int64_t>(c.target_bps * 0.10);
e->ctx->rc_buffer_size= static_cast<int>(c.target_bps * 2);
#ifdef WEBRTC_LINUX
if (std::string(e->codec->name) == "h264_nvenc") {
auto* nv = reinterpret_cast<NvencContext*>(e->ctx->priv_data);
nv->encode_config.rcParams.averageBitRate = e->ctx->bit_rate;
nv->encode_config.rcParams.maxBitRate = e->ctx->rc_max_rate;
}
#endif
}
Register callback and handle allocator rate updates:
int32_t RegisterEncodeCompleteCallback(webrtc::EncodedImageCallback* cb) override { cb_ = cb; return WEBRTC_VIDEO_CODEC_OK; }
int32_t SetRateAllocation(const webrtc::BitrateAllocation& br, uint32_t fps) override {
if (encoders_.empty()) return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
if (fps < 1) return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
if (br.get_sum_bps() == 0) { for (auto& c : cfgs_) c.SetStreamState(false); return WEBRTC_VIDEO_CODEC_OK; }
codec_.maxFramerate = fps;
size_t sidx = encoders_.size() - 1;
for (size_t i = 0; i < encoders_.size(); ++i, --sidx) {
cfgs_[i].target_bps = br.GetSpatialLayerSum(sidx);
cfgs_[i].max_frame_rate = static_cast<float>(fps);
if (cfgs_[i].target_bps) { cfgs_[i].SetStreamState(true); SetContext(encoders_[i], cfgs_[i], false); }
else cfgs_[i].SetStreamState(false);
}
return WEBRTC_VIDEO_CODEC_OK;
}
Encode path and NALU fragmentation for RTP:
int32_t Encode(const webrtc::VideoFrame& frame,
const webrtc::CodecSpecificInfo* info,
const std::vector<webrtc::FrameType>* types) override {
if (encoders_.empty() || !cb_) return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
auto* i420 = static_cast<webrtc::I420BufferInterface*>(frame.video_frame_buffer().get());
bool force_key = false;
for (auto& c : cfgs_) if (c.key_frame_request && c.sending) { force_key = true; break; }
if (!force_key && types) {
for (size_t i = 0; i < types->size() && i < cfgs_.size(); ++i)
if ((*types)[i] == webrtc::kVideoFrameKey && cfgs_[i].sending) { force_key = true; break; }
}
for (size_t i = 0; i < encoders_.size(); ++i) {
if (!cfgs_[i].sending) continue;
FillAvFrame(encoders_[i]->frame, i420); // copy planes, stride aware
encoders_[i]->frame->key_frame = force_key ? 1 : 0;
encoders_[i]->frame->pict_type = force_key ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
cfgs_[i].key_frame_request = false;
int rc = avcodec_send_frame(encoders_[i]->ctx, encoders_[i]->frame);
if (rc != 0) return WEBRTC_VIDEO_CODEC_ERROR;
encoders_[i]->frame->pts++;
while (rc >= 0) {
rc = avcodec_receive_packet(encoders_[i]->ctx, encoders_[i]->pkt);
if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF) break;
if (rc < 0) return WEBRTC_VIDEO_CODEC_ERROR;
auto& out = images_[i];
out._encodedWidth = cfgs_[i].width;
out._encodedHeight = cfgs_[i].height;
out.SetTimestamp(frame.timestamp());
out.ntp_time_ms_ = frame.ntp_time_ms();
out.capture_time_ms_ = frame.render_time_ms();
out.rotation_ = frame.rotation();
out.content_type_ = (codec_.mode == webrtc::VideoCodecMode::kScreensharing)
? webrtc::VideoContentType::SCREENSHARE : webrtc::VideoContentType::UNSPECIFIED;
out.timing_.flags = webrtc::VideoSendTiming::kInvalid;
out._frameType = force_key ? webrtc::kVideoFrameKey : webrtc::kVideoFrameDelta;
webrtc::RTPFragmentationHeader frag;
FragmentAnnexB(encoders_[i]->pkt, i420->width(), i420->height(), &out, &image_bufs_[i], &frag);
av_packet_unref(encoders_[i]->pkt);
if (out._length == 0) continue;
webrtc::CodecSpecificInfo cs; cs.codecType = webrtc::kVideoCodecH264;
cs.codecSpecific.H264.packetization_mode = mode_;
cs.codecSpecific.H264.simulcast_idx = static_cast<uint8_t>(cfgs_[i].simulcast_idx);
cb_->OnEncodedImage(out, &cs, &frag);
}
}
return WEBRTC_VIDEO_CODEC_OK;
}
Annex‑B NAL parsing and RTP fragmentation header generation (handles 0x000001 and 0x00000001):
static constexpr uint8_t kStartCode[4] = {0,0,0,1};
void FragmentAnnexB(AVPacket* p, int w, int h,
webrtc::EncodedImage* out,
std::unique_ptr<uint8_t[]>* buf,
webrtc::RTPFragmentationHeader* frag) {
std::vector<int> starts, lens;
for (int i = 2; i < p->size; ++i) {
bool four = i > 2 && p->data[i-3]==0 && p->data[i-2]==0 && p->data[i-1]==0 && p->data[i]==1;
bool three= p->data[i-2]==0 && p->data[i-1]==0 && p->data[i]==1;
if (four) { if (!starts.empty()) lens.push_back(i-3 - starts.back()); starts.push_back(i+1); }
else if (three) { if (!starts.empty()) lens.push_back(i-2 - starts.back()); starts.push_back(i+1); }
}
if (!starts.empty()) lens.push_back(p->size - starts.back());
int payload = 0; for (int L : lens) payload += L;
size_t need = payload + starts.size()*4;
if (out->_size < need) {
out->_size = CalcBufferSize(webrtc::VideoType::kI420, w, h);
if (out->_size < need) out->_size = need;
out->_buffer = new uint8_t[out->_size];
buf->reset(out->_buffer);
}
out->_length = 0; frag->VerifyAndAllocateFragmentationHeader(starts.size());
for (size_t i = 0; i < starts.size(); ++i) {
memcpy(out->_buffer + out->_length, kStartCode, sizeof(kStartCode));
out->_length += sizeof(kStartCode);
frag->fragmentationOffset[i] = out->_length;
memcpy(out->_buffer + out->_length, p->data + starts[i], static_cast<size_t>(lens[i]));
out->_length += lens[i];
frag->fragmentationLength[i] = static_cast<size_t>(lens[i]);
}
}
Release:
int32_t Release() override {
while (!encoders_.empty()) { auto* e = encoders_.back(); encoders_.pop_back(); Close(e); }
cfgs_.clear(); images_.clear(); image_bufs_.clear();
return WEBRTC_VIDEO_CODEC_OK;
}
void Close(AvEncoder* e) {
if (!e) return;
if (e->ctx) { avcodec_close(e->ctx); avcodec_free_context(&e->ctx); }
if (e->frame) av_frame_free(&e->frame);
if (e->pkt) av_packet_free(&e->pkt);
delete e;
}