Rename Executor to Visitor, fix GoldDiger digPage
This commit is contained in:
parent
cbfdc29fde
commit
2f4fa0c474
@ -1,7 +1,7 @@
|
||||
const fs = require('fs');
|
||||
const pdf = require('pdfjs-dist');
|
||||
const Extract = require('./pdf/Extract');
|
||||
const Executor = require('./pdf/Executor');
|
||||
const Visitor = require('./pdf/Visitor');
|
||||
const Formatter = require('./pdf/Formatter');
|
||||
|
||||
class GoldDiggerError extends Error{
|
||||
@ -16,7 +16,7 @@ class GoldDigger {
|
||||
|
||||
constructor(config) {
|
||||
this.config = config;
|
||||
this.executor = new Executor(config);
|
||||
this.visitor = new Visitor(config);
|
||||
this.formatter = new Formatter()
|
||||
}
|
||||
|
||||
@ -55,7 +55,13 @@ class GoldDigger {
|
||||
this.formatter.start(format, doc, metadata.info);
|
||||
// read pages
|
||||
for(let pageNum = 1; pageNum <= doc.numPages; pageNum++) {
|
||||
this.digPage(doc, pageNum);
|
||||
const page = await doc.getPage(pageNum);
|
||||
const viewport = page.getViewport({ scale: 1.0, });
|
||||
if(debug) console.log(`--- BEGIN Page ${pageNum} size: ${viewport.width}x${viewport.height}`);
|
||||
const output = await this.digPage(page, pageNum);
|
||||
const last = pageNum == doc.numPages;
|
||||
this.formatter.format(format, page, output, last);
|
||||
if(debug) console.log(`--- END Page ${pageNum} objects : ${output.length}`)
|
||||
}
|
||||
this.formatter.end(format);
|
||||
}
|
||||
@ -65,20 +71,16 @@ class GoldDigger {
|
||||
* @param doc - pdf document
|
||||
* @param pageNum - page number
|
||||
*/
|
||||
digPage(doc, pageNum) {
|
||||
const page = await doc.getPage(pageNum);
|
||||
const viewport = page.getViewport({ scale: 1.0, });
|
||||
async digPage(page, pageNum) {
|
||||
|
||||
//const text = await page.extractTextContent();
|
||||
const operatorList = await page.getOperatorList();
|
||||
// page.commonObjs, page.objs
|
||||
// load dependencies
|
||||
const dependencies = await this.loadDependencies(page, operatorList);
|
||||
const opTree = this.convertOpList(operatorList);
|
||||
if(debug) console.log(`--- BEGIN Page ${pageNum} size: ${viewport.width}x${viewport.height}`);
|
||||
const output = this.executeOpTree(opTree, page, dependencies);
|
||||
const last = pageNum == doc.numPages;
|
||||
this.formatter.format(format, page, output, last);
|
||||
if(debug) console.log(`--- END Page ${pageNum} objects : ${output.length}`)
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -171,7 +173,7 @@ class GoldDigger {
|
||||
* @returns {Array} PDFObject array
|
||||
*/
|
||||
executeOpTree(opTree, page, dependencies) {
|
||||
const debug = this.executor.debug;
|
||||
const debug = this.visitor.debug;
|
||||
for (const opTreeElement of opTree) {
|
||||
const fn = opTreeElement.fn;
|
||||
const fnId = opTreeElement.fnId;
|
||||
@ -179,7 +181,7 @@ class GoldDigger {
|
||||
|
||||
switch (fnId | 0) {
|
||||
case pdf.OPS.beginText:
|
||||
this.executor.beginText(args, page, dependencies);
|
||||
this.visitor.beginText(args, page, dependencies);
|
||||
break;
|
||||
case pdf.OPS.dependency:
|
||||
// Handled in `loadDependencies`, so no warning should be shown.
|
||||
@ -193,23 +195,23 @@ class GoldDigger {
|
||||
//this.setLeadingMoveText(args[0], args[1]);
|
||||
break;
|
||||
case pdf.OPS.setFont:
|
||||
this.executor.setFont(args, page, dependencies);
|
||||
this.visitor.setFont(args, page, dependencies);
|
||||
//this.setFont(args);
|
||||
break;
|
||||
case pdf.OPS.showText:
|
||||
this.executor.showText(args, page, dependencies);
|
||||
this.visitor.showText(args, page, dependencies);
|
||||
break;
|
||||
case pdf.OPS.showSpacedText:
|
||||
this.executor.showSpacedText(args, page, dependencies);
|
||||
this.visitor.showSpacedText(args, page, dependencies);
|
||||
break;
|
||||
case pdf.OPS.endText:
|
||||
this.executor.endText(args, page, dependencies);
|
||||
this.visitor.endText(args, page, dependencies);
|
||||
break;
|
||||
case pdf.OPS.moveText:
|
||||
this.executor.moveText(args, page, dependencies);
|
||||
this.visitor.moveText(args, page, dependencies);
|
||||
break;
|
||||
case pdf.OPS.setTextMatrix:
|
||||
this.executor.setTextMatrix(args, page, dependencies);
|
||||
this.visitor.setTextMatrix(args, page, dependencies);
|
||||
break;
|
||||
case pdf.OPS.setCharSpacing:
|
||||
//this.setCharSpacing(args[0]);
|
||||
@ -334,11 +336,11 @@ class GoldDigger {
|
||||
//this.paintImageMaskXObject(args[0]);
|
||||
break;
|
||||
case pdf.OPS.paintFormXObjectBegin:
|
||||
this.executor.paintFormXObjectBegin(args, page, dependencies);
|
||||
this.visitor.paintFormXObjectBegin(args, page, dependencies);
|
||||
//this.paintFormXObjectBegin(args[0], args[1]);
|
||||
break;
|
||||
case pdf.OPS.paintFormXObjectEnd:
|
||||
this.executor.paintFormXObjectEnd(args, page, dependencies);
|
||||
this.visitor.paintFormXObjectEnd(args, page, dependencies);
|
||||
//this.paintFormXObjectEnd();
|
||||
break;
|
||||
case pdf.OPS.closePath:
|
||||
@ -384,7 +386,7 @@ class GoldDigger {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return this.executor.objectList;
|
||||
return this.visitor.objectList;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,10 @@
|
||||
const Extract = require('./Extract');
|
||||
const Text = require('./Text');
|
||||
|
||||
class Executor {
|
||||
/**
|
||||
* Visits pdf.OPT.* methods using pdf page data
|
||||
*/
|
||||
class Visitor {
|
||||
|
||||
constructor (config, debug) {
|
||||
this.txt = new Extract.ExtractText();
|
||||
@ -13,6 +16,12 @@ class Executor {
|
||||
this.currentFont;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param args
|
||||
* @param page
|
||||
* @param dependencies
|
||||
*/
|
||||
beginText(args, page, dependencies) {
|
||||
if (this.debug) console.log('beginText');
|
||||
if (this.skip) return;
|
||||
@ -96,4 +105,4 @@ class Executor {
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = Executor
|
||||
module.exports = Visitor
|
Loading…
Reference in New Issue
Block a user