Rename Executor to Visitor, fix GoldDiger digPage

This commit is contained in:
Michal Szczepanski 2019-07-22 23:32:51 +02:00
parent cbfdc29fde
commit 2f4fa0c474
2 changed files with 35 additions and 24 deletions

View File

@ -1,7 +1,7 @@
const fs = require('fs');
const pdf = require('pdfjs-dist');
const Extract = require('./pdf/Extract');
const Executor = require('./pdf/Executor');
const Visitor = require('./pdf/Visitor');
const Formatter = require('./pdf/Formatter');
class GoldDiggerError extends Error{
@ -16,7 +16,7 @@ class GoldDigger {
constructor(config) {
this.config = config;
this.executor = new Executor(config);
this.visitor = new Visitor(config);
this.formatter = new Formatter()
}
@ -55,7 +55,13 @@ class GoldDigger {
this.formatter.start(format, doc, metadata.info);
// read pages
for(let pageNum = 1; pageNum <= doc.numPages; pageNum++) {
this.digPage(doc, pageNum);
const page = await doc.getPage(pageNum);
const viewport = page.getViewport({ scale: 1.0, });
if(debug) console.log(`--- BEGIN Page ${pageNum} size: ${viewport.width}x${viewport.height}`);
const output = await this.digPage(page, pageNum);
const last = pageNum == doc.numPages;
this.formatter.format(format, page, output, last);
if(debug) console.log(`--- END Page ${pageNum} objects : ${output.length}`)
}
this.formatter.end(format);
}
@ -65,20 +71,16 @@ class GoldDigger {
* @param doc - pdf document
* @param pageNum - page number
*/
digPage(doc, pageNum) {
const page = await doc.getPage(pageNum);
const viewport = page.getViewport({ scale: 1.0, });
async digPage(page, pageNum) {
//const text = await page.extractTextContent();
const operatorList = await page.getOperatorList();
// page.commonObjs, page.objs
// load dependencies
const dependencies = await this.loadDependencies(page, operatorList);
const opTree = this.convertOpList(operatorList);
if(debug) console.log(`--- BEGIN Page ${pageNum} size: ${viewport.width}x${viewport.height}`);
const output = this.executeOpTree(opTree, page, dependencies);
const last = pageNum == doc.numPages;
this.formatter.format(format, page, output, last);
if(debug) console.log(`--- END Page ${pageNum} objects : ${output.length}`)
return output;
}
/**
@ -171,7 +173,7 @@ class GoldDigger {
* @returns {Array} PDFObject array
*/
executeOpTree(opTree, page, dependencies) {
const debug = this.executor.debug;
const debug = this.visitor.debug;
for (const opTreeElement of opTree) {
const fn = opTreeElement.fn;
const fnId = opTreeElement.fnId;
@ -179,7 +181,7 @@ class GoldDigger {
switch (fnId | 0) {
case pdf.OPS.beginText:
this.executor.beginText(args, page, dependencies);
this.visitor.beginText(args, page, dependencies);
break;
case pdf.OPS.dependency:
// Handled in `loadDependencies`, so no warning should be shown.
@ -193,23 +195,23 @@ class GoldDigger {
//this.setLeadingMoveText(args[0], args[1]);
break;
case pdf.OPS.setFont:
this.executor.setFont(args, page, dependencies);
this.visitor.setFont(args, page, dependencies);
//this.setFont(args);
break;
case pdf.OPS.showText:
this.executor.showText(args, page, dependencies);
this.visitor.showText(args, page, dependencies);
break;
case pdf.OPS.showSpacedText:
this.executor.showSpacedText(args, page, dependencies);
this.visitor.showSpacedText(args, page, dependencies);
break;
case pdf.OPS.endText:
this.executor.endText(args, page, dependencies);
this.visitor.endText(args, page, dependencies);
break;
case pdf.OPS.moveText:
this.executor.moveText(args, page, dependencies);
this.visitor.moveText(args, page, dependencies);
break;
case pdf.OPS.setTextMatrix:
this.executor.setTextMatrix(args, page, dependencies);
this.visitor.setTextMatrix(args, page, dependencies);
break;
case pdf.OPS.setCharSpacing:
//this.setCharSpacing(args[0]);
@ -334,11 +336,11 @@ class GoldDigger {
//this.paintImageMaskXObject(args[0]);
break;
case pdf.OPS.paintFormXObjectBegin:
this.executor.paintFormXObjectBegin(args, page, dependencies);
this.visitor.paintFormXObjectBegin(args, page, dependencies);
//this.paintFormXObjectBegin(args[0], args[1]);
break;
case pdf.OPS.paintFormXObjectEnd:
this.executor.paintFormXObjectEnd(args, page, dependencies);
this.visitor.paintFormXObjectEnd(args, page, dependencies);
//this.paintFormXObjectEnd();
break;
case pdf.OPS.closePath:
@ -384,7 +386,7 @@ class GoldDigger {
break;
}
}
return this.executor.objectList;
return this.visitor.objectList;
}
}

View File

@ -1,7 +1,10 @@
const Extract = require('./Extract');
const Text = require('./Text');
class Executor {
/**
* Visits pdf.OPT.* methods using pdf page data
*/
class Visitor {
constructor (config, debug) {
this.txt = new Extract.ExtractText();
@ -12,7 +15,13 @@ class Executor {
this.currentObject;
this.currentFont;
}
/**
*
* @param args
* @param page
* @param dependencies
*/
beginText(args, page, dependencies) {
if (this.debug) console.log('beginText');
if (this.skip) return;
@ -96,4 +105,4 @@ class Executor {
}
}
module.exports = Executor
module.exports = Visitor