Visitor classes move to separate files make universal visit method

This commit is contained in:
Michal Szczepanski 2019-07-23 00:07:23 +02:00
parent 9f9126e2c0
commit e3d25d2e77
5 changed files with 161 additions and 146 deletions

@ -180,9 +180,6 @@ class GoldDigger {
const args = opTreeElement.args; const args = opTreeElement.args;
switch (fnId | 0) { switch (fnId | 0) {
case pdf.OPS.beginText:
this.visitor.beginText(args, page, dependencies);
break;
case pdf.OPS.dependency: case pdf.OPS.dependency:
// Handled in `loadDependencies`, so no warning should be shown. // Handled in `loadDependencies`, so no warning should be shown.
break; break;
@ -194,33 +191,12 @@ class GoldDigger {
if(debug) console.log('setLeadingMoveText'); if(debug) console.log('setLeadingMoveText');
//this.setLeadingMoveText(args[0], args[1]); //this.setLeadingMoveText(args[0], args[1]);
break; break;
case pdf.OPS.setFont:
this.visitor.setFont(args, page, dependencies);
//this.setFont(args);
break;
case pdf.OPS.showText:
this.visitor.showText(args, page, dependencies);
break;
case pdf.OPS.showSpacedText:
this.visitor.showSpacedText(args, page, dependencies);
break;
case pdf.OPS.endText:
this.visitor.endText(args, page, dependencies);
break;
case pdf.OPS.moveText:
this.visitor.moveText(args, page, dependencies);
break;
case pdf.OPS.setTextMatrix:
this.visitor.setTextMatrix(args, page, dependencies);
break;
case pdf.OPS.setCharSpacing: case pdf.OPS.setCharSpacing:
//this.setCharSpacing(args[0]); //this.setCharSpacing(args[0]);
if (debug) console.log('setCharSpacing'); if (debug) console.log('setCharSpacing');
if (config.skip) break;
break; break;
case pdf.OPS.setWordSpacing: case pdf.OPS.setWordSpacing:
if (debug) console.log('setWordSpacing'); if (debug) console.log('setWordSpacing');
if (config.skip) break;
//this.setWordSpacing(args[0]); //this.setWordSpacing(args[0]);
break; break;
case pdf.OPS.setHScale: case pdf.OPS.setHScale:
@ -335,14 +311,6 @@ class GoldDigger {
if(debug) console.log('paintImageMaskXObject'); if(debug) console.log('paintImageMaskXObject');
//this.paintImageMaskXObject(args[0]); //this.paintImageMaskXObject(args[0]);
break; break;
case pdf.OPS.paintFormXObjectBegin:
this.visitor.paintFormXObjectBegin(args, page, dependencies);
//this.paintFormXObjectBegin(args[0], args[1]);
break;
case pdf.OPS.paintFormXObjectEnd:
this.visitor.paintFormXObjectEnd(args, page, dependencies);
//this.paintFormXObjectEnd();
break;
case pdf.OPS.closePath: case pdf.OPS.closePath:
if(debug) console.log('closePath'); if(debug) console.log('closePath');
//this.closePath(); //this.closePath();
@ -382,7 +350,7 @@ class GoldDigger {
//this.group(opTreeElement.items); //this.group(opTreeElement.items);
break; break;
default: default:
console.warn(`Unimplemented operator ${fn}`); this.visitor.visit(fn, args, page, dependencies);
break; break;
} }
} }

@ -4,6 +4,7 @@ class Formatter {
this.formatters = { this.formatters = {
json: new FormatterJSON(), json: new FormatterJSON(),
xml: new FormatterXML(), xml: new FormatterXML(),
text: new FormatterText(),
} }
} }
@ -23,6 +24,21 @@ class Formatter {
} }
} }
class FormatterText {
start(doc, metadata) {
}
format(page, data, last) {
}
end() {
}
}
class FormatterJSON { class FormatterJSON {
start(doc, metadata) { start(doc, metadata) {
const meta = JSON.stringify(metadata) const meta = JSON.stringify(metadata)

@ -1,5 +1,7 @@
const Extract = require('./Extract'); const VisitorText = require('./visitor/VisitorText');
const Text = require('./Text'); const VisitorXObject = require('./visitor/VisitorXObject');
const FN_TEXT = ['beginText', 'setFont', 'showText', 'showSpacedText', 'endText', 'moveText'];
const FN_XOBJECT = ['setTextMatrix', 'paintFormXObjectBegin', 'paintFormXObjectEnd'];
/** /**
* Visits pdf.OPT.* methods using pdf page data * Visits pdf.OPT.* methods using pdf page data
@ -7,122 +9,22 @@ const Text = require('./Text');
class Visitor { class Visitor {
constructor (config, debug) { constructor (config, debug) {
this.txt = new Extract.ExtractText();
this.objectList = []; this.objectList = [];
this.config = config; this.config = config;
this.skip = false; this.config.skip = false;
this.txt = new VisitorText(config, debug, this.objectList);
this.xobject = new VisitorXObject(config, debug, this.objectList);
this.debug = false; this.debug = false;
this.currentObject;
this.currentFont;
} }
/** visit(fname, args, page, dependencies) {
* pdf.OPS.beginText if(FN_TEXT.indexOf(fname) > -1) {
*/ this.txt[fname](args, page, dependencies);
beginText(args, page, dependencies) { } else if(FN_XOBJECT.indexOf(fname) > -1) {
if (this.debug) console.log('beginText'); this.xobject[fname](args, page, dependencies);
if (this.skip) return; } else {
this.currentObject = new Text.TextObject(); console.warn(`Unimplemented operator ${fn}`);
this.currentObject.newLine();
this.objectList.push(this.currentObject);
} }
/**
* pdf.OPS.setFont
*/
setFont(args, page, dependencies) {
if (this.debug) console.log('setFont');
if (this.skip) return;
this.currentFont = this.txt.getFont(args, page, dependencies)
}
/**
* pdf.OPS.showText
*/
showText(args, page, dependencies) {
if (this.debug) console.log("showText");
if (this.skip) return;
const el = this.currentObject.getLine();
el.setFont(this.currentFont)
const el2 = el.getText();
// first text element workaround
el2.setText(this.txt.getText(args[0], el2)+" ");
}
/**
* pdf.OPS.showSpacedText
*/
showSpacedText(args, page, dependencies) {
if (this.debug) console.log("showSpacedText");
if (this.skip) return;
const el = this.currentObject.getLine();
el.setFont(this.currentFont)
const el2 = el.getText();
// first text element workaround
el2.setText(this.txt.getText(args[0], el2)+" ");
}
/**
* pdf.OPS.endText
*/
endText(args, page, dependencies) {
if (this.debug) console.log('endText');
if (this.skip) return;
this.currentObject = null;
}
/**
* pdf.OPS.moveText
*/
moveText(args, page, dependencies) {
if (this.debug) console.log('moveText');
if (this.skip) return;
let el = this.currentObject.getLine();
const x = args[0], y = args[1];
const newLine = el.isNewLine(y);
// new line
if(newLine) {
if(this.config.output === 'text') el.printText();
el = this.currentObject.newLine();
}
// create new text element always after new line
const el2 = el.newText();
el2.x = this.currentObject.x += x;
el2.y = this.currentObject.y += y;
// assign to calculate bounding box
el.setPosition(this.currentObject.x, this.currentObject.y);
}
/**
* pdf.OPS.setTextMatrix
*/
setTextMatrix(args, page, dependencies) {
if (this.debug) console.log('setTextMatrix');
if (this.skip) return;
const a = args[0], b = args[1], c = args[2], d = args[3], e = args[4], f = args[5];
const el = this.currentObject.getLine();
// new text font
el.newText();
el.textMatrix = [a, b, c, d, e, f];
}
/**
* pdf.OPS.paintFormXObjectBegin
*/
paintFormXObjectBegin(args, page, dependencies) {
if(this.debug) console.log('paintFormXObjectBegin');
if(!this.config.paintFormXObject) {
this.skip = true;
}
}
/**
* pdf.OPS.paintFormXObjectEnd
*/
paintFormXObjectEnd(args, page, dependencies) {
if(this.debug) console.log('paintFormXObjectEnd');
this.skip = false;
} }
} }

@ -0,0 +1,90 @@
const Extract = require('./Extract');
const Text = require('./Text');
class VisitorText {
constructor(config, debug, objectList) {
this.txt = new Extract.ExtractText();
this.config = config;
this.debug = debug;
this.objectList = objectList;
this.currentObject;
this.currentFont;
}
/**
* pdf.OPS.beginText
*/
beginText(args, page, dependencies) {
if (this.debug) console.log('beginText');
if (this.config.skip) return;
this.currentObject = new Text.TextObject();
this.currentObject.newLine();
this.objectList.push(this.currentObject);
}
/**
* pdf.OPS.setFont
*/
setFont(args, page, dependencies) {
if (this.debug) console.log('setFont');
if (this.config.skip) return;
this.currentFont = this.txt.getFont(args, page, dependencies)
}
/**
* pdf.OPS.showText
*/
showText(args, page, dependencies) {
if (this.debug) console.log("showText");
if (this.config.skip) return;
const el = this.currentObject.getLine();
el.setFont(this.currentFont)
const el2 = el.getText();
// first text element workaround
el2.setText(this.txt.getText(args[0], el2)+" ");
}
/**
* pdf.OPS.showSpacedText
*/
showSpacedText(args, page, dependencies) {
if (this.debug) console.log("showSpacedText");
if (this.config.skip) return;
const el = this.currentObject.getLine();
el.setFont(this.currentFont)
const el2 = el.getText();
// first text element workaround
el2.setText(this.txt.getText(args[0], el2)+" ");
}
/**
* pdf.OPS.endText
*/
endText(args, page, dependencies) {
if (this.debug) console.log('endText');
if (this.config.skip) return;
this.currentObject = null;
}
/**
* pdf.OPS.moveText
*/
moveText(args, page, dependencies) {
if (this.debug) console.log('moveText');
if (this.config.skip) return;
let el = this.currentObject.getLine();
const x = args[0], y = args[1];
const newLine = el.isNewLine(y);
// new line
if(newLine) {
if(this.config.output === 'text') el.printText();
el = this.currentObject.newLine();
}
// create new text element always after new line
const el2 = el.newText();
el2.x = this.currentObject.x += x;
el2.y = this.currentObject.y += y;
// assign to calculate bounding box
el.setPosition(this.currentObject.x, this.currentObject.y);
}
}

@ -0,0 +1,39 @@
class VisitorXObject {
constructor(config, debug, objectList) {
this.config = config;
this.debug = debug;
this.objectList = objectList;
}
/**
* pdf.OPS.setTextMatrix
*/
setTextMatrix(args, page, dependencies) {
if (this.debug) console.log('setTextMatrix');
if (this.config.skip) return;
/*const a = args[0], b = args[1], c = args[2], d = args[3], e = args[4], f = args[5];
const el = this.currentObject.getLine();
// new text font
el.newText();
el.textMatrix = [a, b, c, d, e, f];*/
}
/**
* pdf.OPS.paintFormXObjectBegin
*/
paintFormXObjectBegin(args, page, dependencies) {
if(this.debug) console.log('paintFormXObjectBegin');
if(!this.config.paintFormXObject) {
this.config.skip = true;
}
}
/**
* pdf.OPS.paintFormXObjectEnd
*/
paintFormXObjectEnd(args, page, dependencies) {
if(this.debug) console.log('paintFormXObjectEnd');
this.config.skip = false;
}
}