我創建了一個應用程式,通過從多個 Excel 作業簿中提取資料來處理學生的結果。問題是使用 Puppeteer 生成 PDF 檔案會使系統陷入回圈,直到它掛起系統。
實際上,我已經使用捆綁為 pdf-creator-node 的 PhantomJs 測驗了以下相同的代碼,并且能夠在 3 分鐘內輕松生成 150 個 PDF 檔案。我拋棄 PhantomJs 的唯一挑戰是 CSS 檔案中的所有樣式都沒有包括在內,即使我將它作為行內樣式插入到標題中,起訴 JS 的替換功能。另一個是 PhantomJs 不再處于積極開發中。我在網上搜索,發現只有 Puppeteer 是有效的解決方案,并且具有積極的開發和支持。
我嘗試在回圈中的 pdfCreator() 末尾使用 page.close(),在 pdfGenerator() 末尾使用 browser.close()。我做錯了什么?
下面是 server.js 和 PdfGenerator.js 檔案中的代碼,帶有錯誤示例,以及系統爬出掛起狀態后我的任務管理器的螢屏截圖。對于 HTML 生成,我使用 Mustache。我排除了 server.js 中的一些代碼行,因為總字符數超過 60k。
服務器.js
// [codes were removed here]
if(getCode == 'compute-result') {
// declare variable
let setData = null;
let setTitle = 'Results Computation...';
let setArgs = getArgs;
// dataFromFile = ReadFile(pathCodeTextFile);
// setArgs = Number(dataFromFile);
setCode = 'compute-result';
let setView = [];
let setNext = true;
let countTerms = [];
// if(getArg > 0) {
// Final Result computation
const getJson = ReadFile(pathJsonResults);
// const getCtrl = ReadFile(pathJsonCtrl);
const getResultObject = JSON.parse(getJson);
getResult = getResultObject;
const totalResults = getResult.firstTerm.length getResult.secondTerm.length getResult.thirdTerm.length;
if(setView.length < 1 && getResult != null) {
setData = 'PDFs for Students Results initiating...';
setView.unshift('Reading saved data...');
client.emit('query', {data: setData, title: setTitle, code: setCode, next: setNext, args: null, view: JSON.stringify(setView)});
}
Sleep(2000).then(() => {
if(getResult != null) {
setData = 'Students Results will be ready in a moment';
client.emit('query', {data: setData, title: setTitle, code: setCode, next: setNext, args: setArgs, view: JSON.stringify(setView)});
}
const wacthFiles = (file, className, termName, sessionName, completed, pdfList) => {
try {
if(typeof file == 'string' && !FileExists(pathJsonPdfList)) {
if(pdfList.length < 2){
setData = 'Saving PDFs to downladable files...';
}
if(className != null && termName != null && sessionName != null) {
setTitle = `${pdfList.length} Result PDF${pdfList.length > 1?'s':''}...`;
setView.unshift(file);
if(!countTerms.includes(termName)) {
countTerms.push(termName)
}
// setCode = -1000 - pdfList.length;
// console.log('PDF PROGRESS: ', `${pdfList.length} Result PDF${pdfList.length > 1?'s':''}... ${setCode}`);
// when all PDFs are created
if(completed) {
setTitle = setTitle.replace('...', ' [completed]');
setData = 'Result Download button is Active. You may click it now.';
setView.unshift('=== PDF GENERATION COMPLETED ===');
setView.unshift(`A total of ${pdfList.length} students' Results were generated`);
WriteFile(pathJsonPdfList, JSON.stringify(pdfList));
// set donwload button active
setCode = Number(codeTextFilePdfCompleted);
setNext = false;
getResult = null;
let termString = countTerms.toString();
termString = ReplaceAll(termString, '-term', '');
termString = ReplaceAll(termString, ',', '-');
const addTxt = `${className} _${termString} Term${countTerms.length>1?'s':''} (${sessionName})`;
WriteFile(pathCodeTextFile, addTxt);
// console.log('======== PDF GENERATION ENDS ================');
} else {
setCode = -1 * pdfList.length;
}
client.emit('query', {data: setData, title: setTitle, code: setCode, next: setNext, args: setArgs, view: JSON.stringify(setView)});
}
}
} catch (error) {
console.log('ERROR ON WATCHER: ', error);
}
}
if(!FileExists(pathJsonPdfList) && getResult !== null) {
PdfGenerator(getResult, wacthFiles);
}
// Watcher(pathWatchResults, setCode, wacthDir, 10000);
});
// }
}
}
} catch (error) {
})
client.on('disconnect', () => {
console.log('SERVER: Disconnected');
});
server.listen(portApi, () =>{
console.log('Server listens on port 8881')
});
// serve static files
app.use(express.static(pathPublic));
// [codes were removed here]
PdfGenerator.js 問題在于這些函式:PdfGenerator & createPdf
'use strict';
process.setMaxListeners(Infinity) // fix for Puppeteer MaxListenerExceededWarning
const Puppeteer = require('puppeteer')
const {HtmlGenerator} = require('../components/HtmlGenerator')
const {WriteFile, FileExists, RandomNumber, RoundNumber, IsNumberFraction, ReadFile} = require('../components/Functions')
if (process.env.NODE_ENV !== 'production') {
require('dotenv').config();
}
const pathFirstTermResults = process.env.DIR_FIRST_TERM_RESULTS;
const pathSecondTermResults = process.env.DIR_SECOND_TERM_RESULTS;
const pathThirdTermResults = process.env.DIR_THIRD_TERM_RESULTS;
const publicDir = process.env.DIR_PUBLIC;
const cssFile = process.env.PATH_CSS_FILENAME;
const pathCssRaw = __dirname '\\' publicDir '\\' cssFile;
const pathCss = pathCssRaw.replace(`\\uploads`, '');
const tagCssReplace = process.env.TAG_CSS_REPLACE;
let jsonDir = process.env.PATH_JSON;
jsonDir = jsonDir.split('/').pop();
let htmlDir = process.env.DIR_HTML;
htmlDir = __dirname '\\' htmlDir.split('/').pop();
const htmlType1 = htmlDir '\\' process.env.HTML_TYPE1;
const htmlType2 = htmlDir '\\' process.env.HTML_TYPE2;
const htmlType3 = htmlDir '\\' process.env.HTML_TYPE3;
const pathJsonPdfList = './' jsonDir '/' process.env.JSON_PDF_LIST_FILENAME;
const pathJsonPdfContent = __dirname '\\' jsonDir '\\' process.env.JSON_PDF_CONTENT;
const firstTermDir = 'first-term';
const secondTermDir = 'second-term';
const thirdTermDir = 'third-term';
let cumulativeFirstTermTotalList = {};
let cumulativeSecondTermTotalList = {};
let firstTermOnce = true;
let secondTermOnce = true;
let thirdTermOnce = true;
let isActive = false;
const getPath = (p, f) => {
let dir = pathFirstTermResults;
switch (p) {
case firstTermDir:
dir = pathFirstTermResults;
break;
case secondTermDir:
dir = pathSecondTermResults;
break;
case thirdTermDir:
dir = pathThirdTermResults;
break;
default:
break;
}
return dir f
}
const resolution = {
x: 1920,
y: 1080
}
const args = [
'--disable-gpu',
`--window-size=${resolution.x},${resolution.y}`,
'--no-sandbox',
]
const createPdf = (page, content, templateType, filename, className, term, sessionName, isProcessActive, pdfFileList, cb) => {
let path, document, options;
path = getPath(term, filename);
if(path != null) {
let options = {
path: path,
format: 'A4',
printBackground: true,
margin: {
left: '0px',
top: '0px',
right: '0px',
bottom: '0px'
}
}
let templateData = '';
switch (templateType) {
case '1':
templateData = ReadFile(htmlType1);
break;
case '2':
templateData = ReadFile(htmlType2);
break;
case '3':
templateData = ReadFile(htmlType3);
break;
default:
templateData = ReadFile(htmlType1);
break;
}
(async() => {
const html = HtmlGenerator(content, templateData);
if(html != undefined && html !== '' && html != null) {
// create PDF file
cb(filename, className, term, sessionName, isProcessActive, pdfFileList);
// get style from .css & replace
const css = ReadFile(pathCss);
await page.setContent(html, { waitUntil: 'networkidle0'});
await page.addStyleTag(css);
await page.pdf(options);
page.close();
}
})()
}
}
const pdfGenerator = (json, cb) => {
let data = {};
let pdfFileList = [];
if(typeof json == 'string') {
data = JSON.parse(json)
} else {
data = json;
}
try {
// declare defaults
let filename = 'Student' '.pdf';
let termName = firstTermDir;
const templateType = data.keys.templateType;
const session = data.classInfo.Session;
const sessionName = session.replace('/', '-');
const students = data.students;
const className = data.classInfo.Class_Name;
const recordFirstTerm = data.firstTerm;
const recordSecondTerm = data.secondTerm;
const recordThirdTerm = data.thirdTerm;
let pdfCreatedList = [];
let isReset = false;
let totalResultsExpected = Object.keys(recordFirstTerm).length Object.keys(recordSecondTerm).length Object.keys(recordThirdTerm).length;
let totalResultsCount = 0;
let jsonForPdf = {};
let record = {};
let sRecord, path, id, fName, lName;
// get each student
let logEndOnce = true;
let logBeforeOnce = true;
logBeforeOnce && console.log('============== *** ================');
logBeforeOnce && console.log('======== PDF GENERATION BEGINS ================');
const computeResult = (page, setTerm, setRecord, setReset) => {
const termName = setTerm;
const record = setRecord;
let isReset = setReset;
logBeforeOnce && console.log(`====== ${termName} RESULTS BEGINS ======`);
for(let elem of students){
id = elem.id;
fName = elem.firstName;
lName = elem.lastName;
filename = `${lName} ${fName} _${termName} ${sessionName}.pdf`;
// sRecord = record.filter(function (entry) { return entry[id] !== undefined; });
sRecord = record[id];
path = getPath(termName, filename);
// create pdf
if(!FileExists(path) && !FileExists(pathJsonPdfList)){
// generate final JSON for the student
// isReset = (pdfCreatedList.includes(id))? false: true;
jsonForPdf = finalJson(elem, sRecord, data, termName);
(pdfFileList.length < 1) && WriteFile(pathJsonPdfContent, JSON.stringify(jsonForPdf));
pdfFileList.push({
'term': termName,
'file': filename
});
totalResultsCount = pdfFileList.length;
const pdfDate = new Date();
console.log(`${filename} (${totalResultsCount}/${totalResultsExpected}) at ${pdfDate.getHours()}hr${pdfDate.getHours()>1?'s':''} - ${pdfDate.getMinutes()}min${pdfDate.getMinutes()>1?'s':''} - ${pdfDate.getSeconds()}sec${pdfDate.getSeconds()>1?'s':''}`);
isActive = (totalResultsExpected === totalResultsCount)? true: false;
logEndOnce = false;
// cb(filename, className, termName, sessionName, isActive, pdfFileList);
// WriteFile(path, null);
isReset = true;
createPdf(page, jsonForPdf, templateType, filename, className, termName, sessionName, isActive, pdfFileList, cb);
}
}
logBeforeOnce && console.log(`====== ${termName} RESULTS ENDS ======`);
}
// get each student result for First Term
const computeFirstTerm = (p) => {
return new Promise((resolve) => {
if(data.keys.firstTerm === '1') {
termName = firstTermDir;
record = recordFirstTerm;
pdfCreatedList = [];
isReset = false;
computeResult(p, termName, record, isReset)
}
resolve()
})
}
// get each student result for Second Term
const computeSecondTerm = (p) => {
return new Promise((resolve) => {
if(data.keys.secondTerm === '1') {
termName = secondTermDir;
record = recordSecondTerm;
pdfCreatedList = [];
isReset = false;
computeResult(p, termName, record, isReset)
}
resolve()
})
}
// get each student result for Third Term
const computeThirdTerm = (p) => {
return new Promise((resolve) => {
if(data.keys.thirdTerm === '1') {
termName = thirdTermDir;
record = recordThirdTerm;
pdfCreatedList = [];
isReset = false;
computeResult(p, termName, record, isReset)
}
resolve()
})
}
(async () => {
browser = await Puppeteer.launch({
headless: true,
handleSIGINT: false,
args: args,
});
const page = await browser.newPage();
await page.setViewport({
width: resolution.x,
height: resolution.y,
})
await computeFirstTerm(page);
await computeSecondTerm(page);
await computeThirdTerm(page);
browser.close()
})()
if(totalResultsExpected === totalResultsCount && totalResultsCount !== 0 && !logEndOnce) {
logEndOnce = true;
logBeforeOnce = false;
console.log('======== PDF GENERATION ENDS ================');
}
} catch (error) {
console.log('==== ERROR IN PDF GENERATION: ', error)
}
}
module.exports = {
PdfGenerator: pdfGenerator
}
錯誤
info Visit https://yarnpkg.com/en/docs/cli/run for documentation about this command.
lerna ERR! yarn run start stderr:
<--- Last few GCs --->
[9884:000002D68A73C6B0] 1665171 ms: Scavenge 44.1 (45.8) -> 43.2 (45.8) MB, 223.9 / 0.0 ms (average mu = 0.956, current mu = 0.952) allocation failure
[9884:000002D68A73C6B0] 1684089 ms: Scavenge 44.1 (45.8) -> 43.3 (45.8) MB, 587.3 / 0.0 ms (average mu = 0.956, current mu = 0.952) allocation failure
[9884:000002D68A73C6B0] 1749901 ms: Scavenge 44.2 (45.8) -> 43.3 (45.8) MB, 5099.0 / 0.0 ms (average mu = 0.956, current mu = 0.952) allocation failure
<--- JS stacktrace --->
FATAL ERROR: Committing semi space failed. Allocation failed - JavaScript heap out of memory
1: 00007FF6ED61013F
2: 00007FF6ED59F396
3: 00007FF6ED5A024D
4: 00007FF6EDED19EE
5: 00007FF6EDEBBECD
6: 00007FF6EDD5F61C
7: 00007FF6EDD6933F
8: 00007FF6EDD5BF19
9: 00007FF6EDD5A0D0
10: 00007FF6EDD7EA06
11: 00007FF6EDAB1CD5
12: 00007FF6EDF5F3E1
13: 00007FF6EDF602E9
14: 000002D68C4EF69E
error Command failed with exit code 134.
任務管理器的螢屏截圖,Chromium 運行超過 50 個的多個實體。

我很感激任何幫助。我希望這可以解決給我一個流暢的 PDF 生成。謝謝你。
uj5u.com熱心網友回復:
示例解決方案(限制并行瀏覽器)
我為您創建了一個PdfPrinter類,您可以將其集成到您的設定中。它允許您限制并行 pdf 生成作業的數量,并允許設定限制并為您管理打開/關閉瀏覽器。該類PdfPrinter也是高度耦合的,需要進行一些修改才能將其用作通用佇列。從邏輯上講,這可以修改為一般佇列。
您可以嘗試將其集成到您的代碼中。這是一個帶有簡化 pdf 的完整作業測驗示例(沒有從 excel 獲取實際資料的部分..)
據我了解您的代碼,您不需要傳遞page所有函式。首先創建您的html css然后使用pdfPrinter并讓它處理page創建 瀏覽器啟動..
(我喜歡撰寫這樣的代碼,所以我直接開始了..)
var puppeteer = require('puppeteer')
const defaultPrinterOptions = {
format: 'A4',
printBackground: true,
margin: {
left: '0px',
top: '0px',
right: '0px',
bottom: '0px'
}
}
class PdfPrinter {
maxBrowsers = 2
enqueuedPrintJobs = []
failedJobs = []
browserInstances = 0
// max browser instances in parallel
constructor(maxBrowsers) {
this.maxBrowsers = maxBrowsers
}
/**
*
* @param {*} html the html content to print
* @param {*} css to apply to the page
* @param {*} printOptions options passed to puppeteer
*/
// enqueues a print but the exact end moment cannot be known..
enqueuePrint = (html, css, path, done) => {
// merge custom options with defaultOptions..
const printOptions = {
...defaultPrinterOptions,
// add the path to the options.
path: path
}
// create a function which can be stored in an array
// it will later be grabbed by startPrinter() OR at the time any
// brwoser freed up..
// the function needs to be passed the actual used browser instance!
this.enqueuedPrintJobs.push(async(browser) => {
// catch the error which may be produced when printing something..
try {
// print the document
await this.print(browser, html, css, printOptions)
} catch (err) {
console.error('error when printing document..CLosing browser and starting a new job!!', printOptions.path)
console.error(err)
// store someting so you now what failed and coudl be retried or something..
this.failedJobs.push({ html, css, path: printOptions.path })
// puppeteer can run into erros too!!
// so close the browser and launch a new one!
await this.closeBrowser(browser)
browser = await this.launchBrowser()
}
// after the print, call done() so the promise is resovled in the right moment when
// this particular print has ended.!
done()
// start the next job right now if there are any left.
const job = this.enqueuedPrintJobs.shift()
if (!job) {
console.log('No print jobs available anymore. CLosing this browser instance.. Remaining browsers now:', this.maxBrowsers - this.browserInstances 1)
await this.closeBrowser(browser)
return
}
// job is actually this function itself! It will be executed
// and automatically grab a new job after completion :)
// we pass the same browser instance to the next job!.
await job(browser)
})
// whenever a print job added make sure to start the printer
// this starts new browser instances if the limit is not exceeded resp. if no browser is instantiated yet,
// and does nothing if maximum browser count is reached..
this.tryStartPrinter()
}
// same as enqueuePrint except it wraps it in a promise so we can now the
// exact end moment and await it..
enqueuePrintPromise(html, css, path) {
return new Promise((resolve, reject) => {
try {
this.enqueuePrint(html, css, path, resolve)
} catch (err) {
console.error('unexpected error when setting up print job..', err)
reject(err)
}
})
}
// If browser instance limit is not reached will isntantiate a new one and run a print job with it.
// a print job will automatically grab a next job with the created browser if there are any left.
tryStartPrinter = async() => {
// Max browser count in use OR no jobs left.
if (this.browserInstances >= this.maxBrowsers || this.enqueuedPrintJobs.length === 0) {
return
}
// browser instances available!
// create a new one
console.log('launching new browser. Available after launch:', this.maxBrowsers - this.browserInstances - 1)
const browser = await this.launchBrowser()
// run job
const job = this.enqueuedPrintJobs.shift()
await job(browser)
}
closeBrowser = async(browser) => {
// decrement browsers in use!
// important to call before closing browser!!
this.browserInstances--
await browser.close()
}
launchBrowser = async() => {
// increment browsers in use!
// important to increase before actualy launching (async stuff..)
this.browserInstances
// this code you have to adjust according your enviromnemt..
const browser = await puppeteer.launch({ headless: true })
return browser
}
// The actual print function which creates a pdf.
print = async(browser, html, css, printOptions) => {
console.log('Converting page to pdf. path:', printOptions.path)
// Run pdf creation in seperate page.
const page = await browser.newPage()
await page.setContent(html, { waitUntil: 'networkidle0' });
await page.addStyleTag({ content: css });
await page.pdf(printOptions);
await page.close();
}
}
// testing the PDFPrinter with some jobs.
// make sure to run the printer in an `async` function so u can
// use await...
const testPrinterQueue = async() => {
// config
const maxOpenedBrowsers = 5 // amount of browser instances which are allowed to be opened in parallel
const testJobCount = 100 // amount of test pdf jobs to be created
const destDir = 'C:\\somepath' // the directory to store the pdfs in..
// create sample jobs for testing...
const jobs = []
for (let i = 0; i < testJobCount; i ) {
jobs.push({
html: `<h1>job number [${i}]</h1>`,
css: 'h1 { background-color: red; }',
path: require('path').join(destDir, `pdf_${i}.pdf`)
})
}
// track time
const label = 'printed a total of ' testJobCount ' pdfs!'
console.time(label)
// run the actual pdf generation..
const printer = new PdfPrinter(maxOpenedBrowsers)
const jobProms = []
for (let job of jobs) {
// run jobs in parallel. Each job wil be runned async and return a Promise therefor
jobProms.push(
printer.enqueuePrintPromise(job.html, job.css, job.path)
)
}
console.log('All jobs enqueued!! Wating for finish now.')
// helper function which awaits all the print jobs, resp. an array of promises.
await Promise.all(jobProms)
console.timeEnd(label)
// failed jobs::
console.log('jobs failed:', printer.failedJobs)
// as file:
await require('fs').promises.writeFile('failed-jobs.json', JSON.stringify(printer.failedJobs))
}
testPrinterQueue().then(() => {
console.log('done with everyting..')
}).catch(err => {
console.error('unexpected error occured while printing all pages...', err)
})
您只需要在開始時調整destDir/openedBrowsers和testJobCountvarstestPrinterQueue()即可使其正常作業。
是什么導致您的代碼出現問題
讓我們來看看這件作品
(async () => {
browser = await Puppeteer.launch({
headless: true,
handleSIGINT: false,
args: args,
});
const page = await browser.newPage();
await page.setViewport({
width: resolution.x,
height: resolution.y,
})
await computeFirstTerm(page);
await computeSecondTerm(page);
await computeThirdTerm(page);
browser.close()
})()
您創建了一個立即執行的匿名函式。在函式內,所有陳述句都使用正確等待await。但是,如果您在應用程式的同步部分中運行這整個部分,則整個函式將立即啟動,但在運行下一個代碼之前不會等待。
簽出這個例子:
//utility
function wait(ms) {
return new Promise(resolve => {
setTimeout(resolve, ms)
})
}
const AsyncFunction = async() => {
console.log('Async named function started')
// simulate execution time of 2 seconds
await wait(2000)
console.log('Async named function ended')
};
function SyncFunction() {
console.log('sync function started')
// example of async function execution within a sync function..
AsyncFunction();
// what you have done in your code:
(async() => {
console.log('Async anonymus function started')
await wait(3000)
console.log('Async anonymus function ended')
})()
// what
console.log('sync function ended.')
}
SyncFunction()
console.log('done')
注意輸出:
Async named function started
Async anonymus function started
sync function ended. // => sync function already ended
done // sync function ended and code continues execution.
Async named function ended
Async anonymus function ended
要正確等待您的async內容,您需要將整個應用程式置于異步范圍內:
//utility
function wait(ms) {
return new Promise(resolve => {
setTimeout(resolve, ms)
})
}
const AsyncFunction = async() => {
console.log('Async named function started')
// simulate execution time of 2 seconds
await wait(2000)
console.log('Async named function ended')
};
// this is now async!!
async function SyncFunction() {
console.log('sync function started')
// example of async function execution within a sync function..
await AsyncFunction();
// what you have done in your code:
await (async() => {
console.log('Async anonymus function started')
await wait(3000)
console.log('Async anonymus function ended')
})()
// what
console.log('sync function ended.')
}
SyncFunction().then(() => {
console.log('done')
}).catch(err => {
console.error('unexpected error occured..')
})
這個輸出就是我們想要的
sync function started
Async named function started
Async named function ended
Async anonymus function started
Async anonymus function ended
sync function ended.
done
希望這可以幫助您理解。
歡迎發表評論。
轉載請註明出處,本文鏈接:https://www.uj5u.com/yidong/428366.html
上一篇:ExceptionConverter:java.io.IOException:嘗試使用iText創建PDF時流已關閉
