ZYNQ Stream-to-Memory DMA with GPIO-Triggered Data Generation
System Architecture
The design implements a PL-based data generator that transfers sequential counter values to PS DDR via AXI DMA. A GPIO signal from the Processing System triggers the transmission, enabling precise software control over acquisition timing. The data flow originates from a custom RTL core, passes through the AXI DMA S2MM channel, and terminates in system memory, with completion signaled via interrupts.
Hardware Implementation
Stream Generator Core
The RTL module produces a burst of incrementing values upon receiving a rising-edge trigger. It implements a minimal AXI4-Stream master interface with parameterized transfer length.
module axis_counter_core #(
parameter integer DATA_WIDTH = 32,
parameter integer PACKET_WORDS = 512
)(
input wire aclk,
input wire aresetn,
input wire gpio_trigger,
output reg m_axis_tvalid,
output reg [DATA_WIDTH-1:0] m_axis_tdata,
output reg m_axis_tlast,
input wire m_axis_tready,
output wire [(DATA_WIDTH/8)-1:0] m_axis_tkeep
);
assign m_axis_tkeep = {DATA_WIDTH/8{1'b1}};
reg trigger_prev, trigger_sync;
wire trigger_edge;
always @(posedge aclk or negedge aresetn) begin
if (!aresetn) begin
trigger_prev <= 1'b0;
trigger_sync <= 1'b0;
end else begin
trigger_prev <= trigger_sync;
trigger_sync <= gpio_trigger;
end
end
assign trigger_edge = trigger_sync & ~trigger_prev;
localparam ST_IDLE = 2'b00;
localparam ST_SEND = 2'b01;
localparam ST_DONE = 2'b10;
reg [1:0] state;
reg [$clog2(PACKET_WORDS)-1:0] word_cnt;
always @(posedge aclk or negedge aresetn) begin
if (!aresetn) begin
state <= ST_IDLE;
m_axis_tvalid <= 1'b0;
m_axis_tdata <= {DATA_WIDTH{1'b0}};
m_axis_tlast <= 1'b0;
word_cnt <= 'd0;
end else begin
case (state)
ST_IDLE: begin
m_axis_tvalid <= 1'b0;
m_axis_tlast <= 1'b0;
word_cnt <= 'd0;
if (trigger_edge && m_axis_tready)
state <= ST_SEND;
end
ST_SEND: begin
m_axis_tvalid <= 1'b1;
m_axis_tdata <= word_cnt;
if (word_cnt == PACKET_WORDS-1) begin
m_axis_tlast <= 1'b1;
state <= ST_DONE;
end else begin
word_cnt <= word_cnt + 1'b1;
end
end
ST_DONE: begin
m_axis_tvalid <= 1'b0;
m_axis_tlast <= 1'b0;
state <= ST_IDLE;
end
default: state <= ST_IDLE;
endcase
end
end
endmodule
Platform Configuration
Integrate the core with AXI DMA configured for S2MM (Stream-to-Memory-Mapped) only. Connect the DMA interrupt output to the PS interrupt controller. Route the AXI GPIO output to the core's trigger input. Enable the High-Performance (HP) AXI port for DDR access and the General-Purpose (GP) port for control register access.
Software Implementation
The application configures the DMA receiver, asserts the GPIO trigger after a settling delay, and processes completion interrupts.
#include "xaxidma.h"
#include "xgpio.h"
#include "xscugic.h"
#include "xil_exception.h"
#include "xil_printf.h"
#define DMA_DEV_ID XPAR_AXIDMA_0_DEVICE_ID
#define GPIO_DEV_ID XPAR_GPIO_0_DEVICE_ID
#define INTC_DEV_ID XPAR_SCUGIC_SINGLE_DEVICE_ID
#define RX_INTR_ID XPAR_FABRIC_AXIDMA_0_VEC_ID
#define DDR_BASE (XPAR_PS7_DDR_0_S_AXI_BASEADDR + 0x01000000)
#define RX_BUFFER_ADDR (DDR_BASE + 0x00300000)
#define TRANSFER_WORDS 512
#define TRANSFER_BYTES (TRANSFER_WORDS * sizeof(u32))
static XAxiDma dma_inst;
static XGpio gpio_inst;
static XScuGic intc_inst;
volatile int rx_complete = 0;
volatile int dma_error = 0;
static void setup_interrupts(void);
static void rx_isr(void *callref);
void trigger_transfer(void);
int main(void)
{
u32 *rx_buffer = (u32 *)RX_BUFFER_ADDR;
int status;
xil_printf("Initializing DMA and GPIO...\r\n");
XGpio_Initialize(&gpio_inst, GPIO_DEV_ID);
XGpio_SetDataDirection(&gpio_inst, 1, 0x0);
XAxiDma_Config *cfg = XAxiDma_LookupConfig(DMA_DEV_ID);
XAxiDma_CfgInitialize(&dma_inst, cfg);
setup_interrupts();
XAxiDma_IntrDisable(&dma_inst, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DEVICE_TO_DMA);
XAxiDma_IntrEnable(&dma_inst, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DEVICE_TO_DMA);
rx_complete = 0;
dma_error = 0;
Xil_DCacheFlushRange((UINTPTR)rx_buffer, TRANSFER_BYTES);
status = XAxiDma_SimpleTransfer(&dma_inst, (UINTPTR)rx_buffer,
TRANSFER_BYTES, XAXIDMA_DEVICE_TO_DMA);
if (status != XST_SUCCESS)
return XST_FAILURE;
usleep(1000);
trigger_transfer();
while (!rx_complete && !dma_error);
if (dma_error) {
xil_printf("Transfer failed\r\n");
return XST_FAILURE;
}
Xil_DCacheFlushRange((UINTPTR)rx_buffer, TRANSFER_BYTES);
xil_printf("Received %d words starting with 0x%08X\r\n",
TRANSFER_WORDS, rx_buffer[0]);
return XST_SUCCESS;
}
void trigger_transfer(void)
{
XGpio_DiscreteWrite(&gpio_inst, 1, 0x1);
XGpio_DiscreteClear(&gpio_inst, 1, 0x1);
}
static void rx_isr(void *callref)
{
XAxiDma *inst = (XAxiDma *)callref;
u32 irq_status = XAxiDma_IntrGetIrq(inst, XAXIDMA_DEVICE_TO_DMA);
XAxiDma_IntrAckIrq(inst, irq_status, XAXIDMA_DEVICE_TO_DMA);
if (!(irq_status & XAXIDMA_IRQ_ALL_MASK))
return;
if (irq_status & XAXIDMA_IRQ_ERROR_MASK) {
dma_error = 1;
XAxiDma_Reset(inst);
while (!XAxiDma_ResetIsDone(inst));
} else if (irq_status & XAXIDMA_IRQ_IOC_MASK) {
rx_complete = 1;
}
}
static void setup_interrupts(void)
{
XScuGic_Config *cfg = XScuGic_LookupConfig(INTC_DEV_ID);
XScuGic_CfgInitialize(&intc_inst, cfg, cfg->CpuBaseAddress);
XScuGic_SetPriorityTriggerType(&intc_inst, RX_INTR_ID, 0xA0, 0x3);
XScuGic_Connect(&intc_inst, RX_INTR_ID, (Xil_InterruptHandler)rx_isr, &dma_inst);
XScuGic_Enable(&intc_inst, RX_INTR_ID);
Xil_ExceptionInit();
Xil_ExceptionRegisterHandler(XIL_EXCEPTION_ID_INT,
(Xil_ExceptionHandler)XScuGic_InterruptHandler, &intc_inst);
Xil_ExceptionEnable();
}
Troubleshooting
Missing Completion Interrupts
If the stream signals appear correct but no interrupt fires, verify the DMA interrupt output connects to the PS IRQ input in the hardware design. Confirm the transfer length argument to XAxiDma_SimpleTransfer specifies bytes, not words. For 512 32-bit words, pass 2048. Ensure interrupt handlers are registered before starting the transfer, and the interrupt controller is enabled.
Optional Stream Signals
The TKEEP and TSTRB signals may be omitted or tied high when using the standard AXI DMA core, as it assumes byte-aligned transfers by default.
Timing Violations
If TREADY deasserts short after the transfer begins, causing data loss, the PL core likely started before the DMA engine initialized. Insert a millisecond-scale delay between XAxiDma_SimpleTransfer and the GPIO trigger assertion to allow the DMA descriptor to load completely.