Rename Executor to Visitor, fix GoldDiger digPage
This commit is contained in:
parent
cbfdc29fde
commit
2f4fa0c474
|
@ -1,7 +1,7 @@
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const pdf = require('pdfjs-dist');
|
const pdf = require('pdfjs-dist');
|
||||||
const Extract = require('./pdf/Extract');
|
const Extract = require('./pdf/Extract');
|
||||||
const Executor = require('./pdf/Executor');
|
const Visitor = require('./pdf/Visitor');
|
||||||
const Formatter = require('./pdf/Formatter');
|
const Formatter = require('./pdf/Formatter');
|
||||||
|
|
||||||
class GoldDiggerError extends Error{
|
class GoldDiggerError extends Error{
|
||||||
|
@ -16,7 +16,7 @@ class GoldDigger {
|
||||||
|
|
||||||
constructor(config) {
|
constructor(config) {
|
||||||
this.config = config;
|
this.config = config;
|
||||||
this.executor = new Executor(config);
|
this.visitor = new Visitor(config);
|
||||||
this.formatter = new Formatter()
|
this.formatter = new Formatter()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,7 +55,13 @@ class GoldDigger {
|
||||||
this.formatter.start(format, doc, metadata.info);
|
this.formatter.start(format, doc, metadata.info);
|
||||||
// read pages
|
// read pages
|
||||||
for(let pageNum = 1; pageNum <= doc.numPages; pageNum++) {
|
for(let pageNum = 1; pageNum <= doc.numPages; pageNum++) {
|
||||||
this.digPage(doc, pageNum);
|
const page = await doc.getPage(pageNum);
|
||||||
|
const viewport = page.getViewport({ scale: 1.0, });
|
||||||
|
if(debug) console.log(`--- BEGIN Page ${pageNum} size: ${viewport.width}x${viewport.height}`);
|
||||||
|
const output = await this.digPage(page, pageNum);
|
||||||
|
const last = pageNum == doc.numPages;
|
||||||
|
this.formatter.format(format, page, output, last);
|
||||||
|
if(debug) console.log(`--- END Page ${pageNum} objects : ${output.length}`)
|
||||||
}
|
}
|
||||||
this.formatter.end(format);
|
this.formatter.end(format);
|
||||||
}
|
}
|
||||||
|
@ -65,20 +71,16 @@ class GoldDigger {
|
||||||
* @param doc - pdf document
|
* @param doc - pdf document
|
||||||
* @param pageNum - page number
|
* @param pageNum - page number
|
||||||
*/
|
*/
|
||||||
digPage(doc, pageNum) {
|
async digPage(page, pageNum) {
|
||||||
const page = await doc.getPage(pageNum);
|
|
||||||
const viewport = page.getViewport({ scale: 1.0, });
|
|
||||||
//const text = await page.extractTextContent();
|
//const text = await page.extractTextContent();
|
||||||
const operatorList = await page.getOperatorList();
|
const operatorList = await page.getOperatorList();
|
||||||
// page.commonObjs, page.objs
|
// page.commonObjs, page.objs
|
||||||
// load dependencies
|
// load dependencies
|
||||||
const dependencies = await this.loadDependencies(page, operatorList);
|
const dependencies = await this.loadDependencies(page, operatorList);
|
||||||
const opTree = this.convertOpList(operatorList);
|
const opTree = this.convertOpList(operatorList);
|
||||||
if(debug) console.log(`--- BEGIN Page ${pageNum} size: ${viewport.width}x${viewport.height}`);
|
|
||||||
const output = this.executeOpTree(opTree, page, dependencies);
|
const output = this.executeOpTree(opTree, page, dependencies);
|
||||||
const last = pageNum == doc.numPages;
|
return output;
|
||||||
this.formatter.format(format, page, output, last);
|
|
||||||
if(debug) console.log(`--- END Page ${pageNum} objects : ${output.length}`)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -171,7 +173,7 @@ class GoldDigger {
|
||||||
* @returns {Array} PDFObject array
|
* @returns {Array} PDFObject array
|
||||||
*/
|
*/
|
||||||
executeOpTree(opTree, page, dependencies) {
|
executeOpTree(opTree, page, dependencies) {
|
||||||
const debug = this.executor.debug;
|
const debug = this.visitor.debug;
|
||||||
for (const opTreeElement of opTree) {
|
for (const opTreeElement of opTree) {
|
||||||
const fn = opTreeElement.fn;
|
const fn = opTreeElement.fn;
|
||||||
const fnId = opTreeElement.fnId;
|
const fnId = opTreeElement.fnId;
|
||||||
|
@ -179,7 +181,7 @@ class GoldDigger {
|
||||||
|
|
||||||
switch (fnId | 0) {
|
switch (fnId | 0) {
|
||||||
case pdf.OPS.beginText:
|
case pdf.OPS.beginText:
|
||||||
this.executor.beginText(args, page, dependencies);
|
this.visitor.beginText(args, page, dependencies);
|
||||||
break;
|
break;
|
||||||
case pdf.OPS.dependency:
|
case pdf.OPS.dependency:
|
||||||
// Handled in `loadDependencies`, so no warning should be shown.
|
// Handled in `loadDependencies`, so no warning should be shown.
|
||||||
|
@ -193,23 +195,23 @@ class GoldDigger {
|
||||||
//this.setLeadingMoveText(args[0], args[1]);
|
//this.setLeadingMoveText(args[0], args[1]);
|
||||||
break;
|
break;
|
||||||
case pdf.OPS.setFont:
|
case pdf.OPS.setFont:
|
||||||
this.executor.setFont(args, page, dependencies);
|
this.visitor.setFont(args, page, dependencies);
|
||||||
//this.setFont(args);
|
//this.setFont(args);
|
||||||
break;
|
break;
|
||||||
case pdf.OPS.showText:
|
case pdf.OPS.showText:
|
||||||
this.executor.showText(args, page, dependencies);
|
this.visitor.showText(args, page, dependencies);
|
||||||
break;
|
break;
|
||||||
case pdf.OPS.showSpacedText:
|
case pdf.OPS.showSpacedText:
|
||||||
this.executor.showSpacedText(args, page, dependencies);
|
this.visitor.showSpacedText(args, page, dependencies);
|
||||||
break;
|
break;
|
||||||
case pdf.OPS.endText:
|
case pdf.OPS.endText:
|
||||||
this.executor.endText(args, page, dependencies);
|
this.visitor.endText(args, page, dependencies);
|
||||||
break;
|
break;
|
||||||
case pdf.OPS.moveText:
|
case pdf.OPS.moveText:
|
||||||
this.executor.moveText(args, page, dependencies);
|
this.visitor.moveText(args, page, dependencies);
|
||||||
break;
|
break;
|
||||||
case pdf.OPS.setTextMatrix:
|
case pdf.OPS.setTextMatrix:
|
||||||
this.executor.setTextMatrix(args, page, dependencies);
|
this.visitor.setTextMatrix(args, page, dependencies);
|
||||||
break;
|
break;
|
||||||
case pdf.OPS.setCharSpacing:
|
case pdf.OPS.setCharSpacing:
|
||||||
//this.setCharSpacing(args[0]);
|
//this.setCharSpacing(args[0]);
|
||||||
|
@ -334,11 +336,11 @@ class GoldDigger {
|
||||||
//this.paintImageMaskXObject(args[0]);
|
//this.paintImageMaskXObject(args[0]);
|
||||||
break;
|
break;
|
||||||
case pdf.OPS.paintFormXObjectBegin:
|
case pdf.OPS.paintFormXObjectBegin:
|
||||||
this.executor.paintFormXObjectBegin(args, page, dependencies);
|
this.visitor.paintFormXObjectBegin(args, page, dependencies);
|
||||||
//this.paintFormXObjectBegin(args[0], args[1]);
|
//this.paintFormXObjectBegin(args[0], args[1]);
|
||||||
break;
|
break;
|
||||||
case pdf.OPS.paintFormXObjectEnd:
|
case pdf.OPS.paintFormXObjectEnd:
|
||||||
this.executor.paintFormXObjectEnd(args, page, dependencies);
|
this.visitor.paintFormXObjectEnd(args, page, dependencies);
|
||||||
//this.paintFormXObjectEnd();
|
//this.paintFormXObjectEnd();
|
||||||
break;
|
break;
|
||||||
case pdf.OPS.closePath:
|
case pdf.OPS.closePath:
|
||||||
|
@ -384,7 +386,7 @@ class GoldDigger {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return this.executor.objectList;
|
return this.visitor.objectList;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
const Extract = require('./Extract');
|
const Extract = require('./Extract');
|
||||||
const Text = require('./Text');
|
const Text = require('./Text');
|
||||||
|
|
||||||
class Executor {
|
/**
|
||||||
|
* Visits pdf.OPT.* methods using pdf page data
|
||||||
|
*/
|
||||||
|
class Visitor {
|
||||||
|
|
||||||
constructor (config, debug) {
|
constructor (config, debug) {
|
||||||
this.txt = new Extract.ExtractText();
|
this.txt = new Extract.ExtractText();
|
||||||
|
@ -13,6 +16,12 @@ class Executor {
|
||||||
this.currentFont;
|
this.currentFont;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param args
|
||||||
|
* @param page
|
||||||
|
* @param dependencies
|
||||||
|
*/
|
||||||
beginText(args, page, dependencies) {
|
beginText(args, page, dependencies) {
|
||||||
if (this.debug) console.log('beginText');
|
if (this.debug) console.log('beginText');
|
||||||
if (this.skip) return;
|
if (this.skip) return;
|
||||||
|
@ -96,4 +105,4 @@ class Executor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = Executor
|
module.exports = Visitor
|
Loading…
Reference in New Issue