Solving Text Truncation at Page Breaks When Converting Frontend Pages to PDF
Summary
In practice, exporting rich text content to PDF often results in varying degrees of text truncation at page breaks due to font size, formatting, and page size issues.
Main Challenge
Images and text being split across pages, i.e., page break handling.
Technical Details
Simple PDF export from the frontend can be achieved using html2canvas and jspdf; this article focuses on handling truncation at page breaks. Assume the fixed PDF page height is a, the height of each block element to be inserted is b, the distance of the element from the top is c, and the current page number is p. The formula b + c > a * p indicates a potential truncation point. The element at risk is called the trnucation-prone element. Two solutions exist:
Solution 1: Insert an empty element of height a * p - c before the truncation-prone element to push it to the next page. This may leave large whitespace if the element is pure text, but works well for images.
Solution 2: Split the truncation-prone elemant into two parts and insert whitespace at the break. For example, if the line height is 36px, calculate the remaining space a * p - c, split it into an enteger multiple of line height (e.g., Math.floor((a*p-c)/36)*36) and the remainder. Then insert a duplicate element with marginTop set to the negative integer multiple to shift it up. This approach leaves whitespace smaller than a line height, making it more acceptable.
Code Implementation
Solution 2 is an optimization of Solution 1. Below is the core code for Solution 2.
// Check if a blank div needs to be inserted
function isSplit(nodes, index, pageHeight) {
const y1 = document.getElementById('itemsss').offsetTop;
const y = document.getElementById('itemss').offsetTop + y1;
const current = nodes[index];
const next = nodes[index + 1];
if (current.offsetTop + y + current.offsetHeight < pageHeight && next && next.offsetTop + y + next.offsetHeight > pageHeight) {
return true;
}
return false;
}
async function outPutPdfFn() {
const pageHeight = 1137;
const pdfDom = document.getElementById('pdfDom');
const paragraphs = pdfDom.getElementsByTagName('p');
for (let i = 0; i < paragraphs.length; i++) {
const y1 = document.getElementById('item').offsetTop;
const y2 = document.getElementById('itemss').offsetTop + y1;
const multiple = Math.ceil((paragraphs[i].offsetTop + y2 + paragraphs[i].offsetHeight) / pageHeight);
const y = y2 - 21 * (multiple - 1); // Empirical adjustment
if (isSplit(paragraphs, i, multiple * pageHeight)) {
const parent = paragraphs[i].parentNode;
const blankDiv = document.createElement('div');
blankDiv.className = 'emptyDiv';
blankDiv.style.background = 'transparent';
blankDiv.style.overflow = 'hidden';
const _H = multiple * pageHeight - (paragraphs[i].offsetTop + y + paragraphs[i].offsetHeight);
const a = Math.floor(_H / 36) * 36;
blankDiv.style.height = _H + 'px';
blankDiv.style.width = '100%';
const next = paragraphs[i].nextSibling;
if (next) {
if (a > 0) {
const indent = getComputedStyle(next, false)['text-indent'];
blankDiv.innerHTML = `<p style="height: ${a}px; overflow:hidden; margin-top:0; text-indent: ${indent};">${next.innerHTML}</p>`;
next.style = `overflow:hidden; height: ${next.offsetHeight - a}px;`;
next.innerHTML = `<p style="margin-top: -${a}px; text-indent: ${indent};">${next.innerHTML}</p>`;
}
parent.insertBefore(blankDiv, next);
} else {
parent.appendChild(blankDiv);
}
}
}
}
async function getPdf(ref, footer, header, loading) {
await outPutPdfFn(ref);
html2canvas(ref, {
allowTaint: false,
taintTest: false,
logging: false,
useCORS: true,
dpi: 4,
scale: 4
}).then(async canvas => {
const pdf = new JsPDF('p', 'mm', 'a4');
const ctx = canvas.getContext('2d');
const a4w = 160, a4h = 247;
const imgHeight = Math.floor(a4h * canvas.width / a4w);
let renderedHeight = 0;
let i = 0;
const pages = Math.ceil(canvas.height / imgHeight);
while (renderedHeight < canvas.height) {
i++;
const pageCanvas = document.createElement('canvas');
pageCanvas.width = canvas.width;
pageCanvas.height = Math.min(imgHeight, canvas.height - renderedHeight);
pageCanvas.getContext('2d').putImageData(
ctx.getImageData(0, renderedHeight, canvas.width, Math.min(imgHeight, canvas.height - renderedHeight)),
0, 0
);
pdf.addImage(pageCanvas.toDataURL('image/jpeg', 1.0), 'JPEG', 25, 25, a4w, Math.min(a4h, a4w * pageCanvas.height / pageCanvas.width));
if (i > 1) {
const canvHeader = await html2canvas(header, {
dpi: 0.5,
useCORS: true
});
pdf.addImage(canvHeader.toDataURL('image/jpeg', 1.0), 'JPEG', 25, 24.9, a4w, 0.1);
}
footer.querySelector('.pdf-footer-page').innerText = i;
footer.querySelector('.pdf-footer-page-count').innerText = pages;
const canvFooter = await html2canvas(footer, {
dpi: 4,
scale: 4,
useCORS: true,
allowTaint: false,
taintTest: false,
logging: false
});
renderedHeight += imgHeight;
pdf.addImage(canvFooter.toDataURL('image/jpeg', 1.0), 'JPEG', 25, 272, a4w, 9);
if (renderedHeight < canvas.height) {
pdf.addPage();
}
}
pdf.save('test.pdf');
loading.exportLoading = false;
});
}