Add eslint standard with small modifications

- semi always
- comma-dangle always-multiple
This commit is contained in:
Michal Szczepanski 2019-07-28 09:40:39 +02:00
parent 4fd9b6024c
commit dea317eda8
25 changed files with 311 additions and 282 deletions

11
.eslintrc.json Normal file

@ -0,0 +1,11 @@
{
"extends": "standard",
"parserOptions": {
"sourceType": "module",
"ecmaVersion": 2017
},
"rules": {
"semi": [2, "always"],
"comma-dangle": ["error", "always-multiline"]
}
}

@ -24,11 +24,18 @@
"pdf-text-extract"
],
"scripts": {
"doc": "esdoc"
"doc": "esdoc",
"lint": "eslint src"
},
"devDependencies": {
"esdoc": "^1.1.0",
"esdoc-node": "^1.0.4",
"esdoc-standard-plugin": "^1.0.0"
"esdoc-standard-plugin": "^1.0.0",
"eslint": "^6.1.0",
"eslint-config-standard": "^13.0.1",
"eslint-plugin-import": "^2.18.2",
"eslint-plugin-node": "^9.1.0",
"eslint-plugin-promise": "^4.2.1",
"eslint-plugin-standard": "^4.0.0"
}
}

@ -1,7 +1,5 @@
const fs = require('fs');
const pdf = require('pdfjs-dist');
const util = require('pdfjs-dist/lib/shared/util');
const Extract = require('./pdf/Extract');
const Visitor = require('./pdf/Visitor');
const Formatter = require('./pdf/Formatter');
const FileManager = require('./pdf/FileManager');
@ -9,7 +7,7 @@ const FileManager = require('./pdf/FileManager');
/**
* Generic error
*/
class GoldDiggerError extends Error{
class GoldDiggerError extends Error {
}
@ -18,12 +16,11 @@ class GoldDiggerError extends Error{
* Code based on pdf.js SVGGraphics
*/
class GoldDigger {
/**
* Constructor
* @param {object} config - configuration
*/
constructor(config) {
constructor (config) {
this.config = config;
/**
* @type {Formatter}
@ -34,41 +31,41 @@ class GoldDigger {
/**
* Checks if file exists load file to memory and returns PDFDocument
*/
async getDocument() {
async getDocument () {
if (!fs.existsSync(this.config.input)) {
throw new GoldDiggerError(`File not exists ${this.config.input}`);
}
if(this.config.debug) console.log('Reading pdf');
if (this.config.debug) console.log('Reading pdf');
// read file
const data = fs.readFileSync(this.config.input);
if(this.config.debug) console.log(data.length);
if (this.config.debug) console.log(data.length);
const doc = await pdf.getDocument({
data: data,
}).promise;
return doc
return doc;
}
/**
* Main method for pdf-gold-diger
* @returns {Promise<void>}
*/
async dig() {
async dig () {
const doc = await this.getDocument();
const debug = this.config.debug;
if(debug) console.log(`Pages : ${doc.numPages}`);
if (debug) console.log(`Pages : ${doc.numPages}`);
// prepare formatting
const format = this.config.format;
const metadata = await doc.getMetadata();
this.formatter.start(format, doc, metadata.info);
// read pages
for(let pageNum = 1; pageNum <= doc.numPages; pageNum++) {
for (let pageNum = 1; pageNum <= doc.numPages; pageNum++) {
const pageData = await doc.getPage(pageNum);
const viewport = pageData.getViewport({ scale: 1.0, });
if(debug) console.log(`--- BEGIN Page ${pageNum} size: ${viewport.width}x${viewport.height}`);
const viewport = pageData.getViewport({ scale: 1.0 });
if (debug) console.log(`--- BEGIN Page ${pageNum} size: ${viewport.width}x${viewport.height}`);
const output = await this.digPage(pageData, pageNum);
const last = pageNum == doc.numPages;
const last = pageNum === doc.numPages;
this.formatter.format(format, pageData, output, last);
if(debug) console.log(`--- END Page ${pageNum} objects : ${output.length}`)
if (debug) console.log(`--- END Page ${pageNum} objects : ${output.length}`);
}
this.formatter.end(format);
// save to file
@ -81,9 +78,8 @@ class GoldDigger {
* @param pageData - pdf page
* @param {number} pageNum - page number
*/
async digPage(pageData, pageNum) {
//const text = await page.extractTextContent();
async digPage (pageData, pageNum) {
// const text = await page.extractTextContent();
const operatorList = await pageData.getOperatorList();
// page.commonObjs, page.objs
// load dependencies
@ -100,7 +96,7 @@ class GoldDigger {
* @param operatorList
* @returns {Promise<Array>}
*/
async loadDependencies(pageData, operatorList) {
async loadDependencies (pageData, operatorList) {
const fnArray = operatorList.fnArray;
const argsArray = operatorList.argsArray;
const out = [];
@ -120,9 +116,9 @@ class GoldDigger {
/**
* (SVGGraphics)
*/
operatorMapping() {
const mapping = {}
for(var op in pdf.OPS) {
operatorMapping () {
const mapping = {};
for (const op in pdf.OPS) {
mapping[pdf.OPS[op]] = op;
}
return mapping;
@ -133,7 +129,7 @@ class GoldDigger {
* @param operatorList
* @returns {*}
*/
convertOpList(operatorList) {
convertOpList (operatorList) {
const operatorIdMapping = this.operatorMapping();
const argsArray = operatorList.argsArray;
const fnArray = operatorList.fnArray;
@ -141,9 +137,9 @@ class GoldDigger {
for (let i = 0, ii = fnArray.length; i < ii; i++) {
const fnId = fnArray[i];
opList.push({
'fnId': fnId,
'fn': operatorIdMapping[fnId],
'args': argsArray[i],
fnId,
fn: operatorIdMapping[fnId],
args: argsArray[i],
});
}
return this.opListToTree(opList);
@ -154,13 +150,13 @@ class GoldDigger {
* @param opList
* @returns {Array}
*/
opListToTree(opList) {
opListToTree (opList) {
let opTree = [];
const tmp = [];
for (const opListElement of opList) {
if (opListElement.fn === 'save') {
opTree.push({ 'fnId': 92, 'fn': 'group', 'items': [], });
opTree.push({ fnId: 92, fn: 'group', items: [] });
tmp.push(opTree);
opTree = opTree[opTree.length - 1].items;
continue;
@ -181,7 +177,7 @@ class GoldDigger {
* @param opTree - pdf tree of information
* @param {Visitor} visitor - class for parsing incoming tags
*/
executeOpTree(opTree, visitor) {
executeOpTree (opTree, visitor) {
const debug = visitor.debug;
for (const opTreeElement of opTree) {
const fn = opTreeElement.fn;
@ -198,124 +194,124 @@ class GoldDigger {
break;
case pdf.OPS.setLineWidth:
if (debug) console.log('setLineWidth');
//this.setLineWidth(args[0]);
// this.setLineWidth(args[0]);
break;
case pdf.OPS.setLineJoin:
if (debug) console.log('setLineJoin');
//this.setLineJoin(args[0]);
// this.setLineJoin(args[0]);
break;
case pdf.OPS.setLineCap:
if (debug) console.log('setLineCap');
//this.setLineCap(args[0]);
// this.setLineCap(args[0]);
break;
case pdf.OPS.setMiterLimit:
if(debug) console.log('setMiterLimit');
//this.setMiterLimit(args[0]);
if (debug) console.log('setMiterLimit');
// this.setMiterLimit(args[0]);
break;
case pdf.OPS.setFillRGBColor:
if(debug) console.log('setFillRGBColor');
//this.setFillRGBColor(args[0], args[1], args[2]);
if (debug) console.log('setFillRGBColor');
// this.setFillRGBColor(args[0], args[1], args[2]);
break;
case pdf.OPS.setStrokeRGBColor:
if(debug) console.log('setStrokeRGBColor');
//this.setStrokeRGBColor(args[0], args[1], args[2]);
if (debug) console.log('setStrokeRGBColor');
// this.setStrokeRGBColor(args[0], args[1], args[2]);
break;
case pdf.OPS.setStrokeColorN:
if(debug) console.log('setStrokeColorN');
//this.setStrokeColorN(args);
if (debug) console.log('setStrokeColorN');
// this.setStrokeColorN(args);
break;
case pdf.OPS.setFillColorN:
if(debug) console.log('setFillColorN');
//this.setFillColorN(args);
if (debug) console.log('setFillColorN');
// this.setFillColorN(args);
break;
case pdf.OPS.shadingFill:
if(debug) console.log('shadingFill');
//this.shadingFill(args[0]);
if (debug) console.log('shadingFill');
// this.shadingFill(args[0]);
break;
case pdf.OPS.setDash:
if(debug) console.log('setDash');
//this.setDash(args[0], args[1]);
if (debug) console.log('setDash');
// this.setDash(args[0], args[1]);
break;
case pdf.OPS.setRenderingIntent:
if(debug) console.log('setRenderingIntent');
//this.setRenderingIntent(args[0]);
if (debug) console.log('setRenderingIntent');
// this.setRenderingIntent(args[0]);
break;
case pdf.OPS.setFlatness:
if(debug) console.log('setFlatness');
//this.setFlatness(args[0]);
if (debug) console.log('setFlatness');
// this.setFlatness(args[0]);
break;
case pdf.OPS.setGState:
if(debug) console.log('setGState');
//this.setGState(args[0]);
if (debug) console.log('setGState');
// this.setGState(args[0]);
break;
case pdf.OPS.fill:
if(debug) console.log('fill');
//this.fill();
if (debug) console.log('fill');
// this.fill();
break;
case pdf.OPS.eoFill:
if(debug) console.log('eoFill');
//this.eoFill();
if (debug) console.log('eoFill');
// this.eoFill();
break;
case pdf.OPS.stroke:
if(debug) console.log('stroke');
//this.stroke();
if (debug) console.log('stroke');
// this.stroke();
break;
case pdf.OPS.fillStroke:
if(debug) console.log('fillStroke');
//this.fillStroke();
if (debug) console.log('fillStroke');
// this.fillStroke();
break;
case pdf.OPS.eoFillStroke:
if(debug) console.log('eoFillStroke');
//this.eoFillStroke();
if (debug) console.log('eoFillStroke');
// this.eoFillStroke();
break;
case pdf.OPS.clip:
if(debug) console.log('clip');
//this.clip('nonzero');
if (debug) console.log('clip');
// this.clip('nonzero');
break;
case pdf.OPS.eoClip:
if(debug) console.log('eoClip');
//this.clip('evenodd');
if (debug) console.log('eoClip');
// this.clip('evenodd');
break;
case pdf.OPS.paintSolidColorImageMask:
if(debug) console.log('paintSolidColorImageMask');
//this.paintSolidColorImageMask();
if (debug) console.log('paintSolidColorImageMask');
// this.paintSolidColorImageMask();
break;
case pdf.OPS.paintImageMaskXObject:
if(debug) console.log('paintImageMaskXObject');
//this.paintImageMaskXObject(args[0]);
if (debug) console.log('paintImageMaskXObject');
// this.paintImageMaskXObject(args[0]);
break;
case pdf.OPS.closePath:
if(debug) console.log('closePath');
//this.closePath();
if (debug) console.log('closePath');
// this.closePath();
break;
case pdf.OPS.closeStroke:
if(debug) console.log('closeStroke');
//this.closeStroke();
if (debug) console.log('closeStroke');
// this.closeStroke();
break;
case pdf.OPS.closeFillStroke:
if(debug) console.log('closeFillStroke');
//this.closeFillStroke();
if (debug) console.log('closeFillStroke');
// this.closeFillStroke();
break;
case pdf.OPS.closeEOFillStroke:
if(debug) console.log('closeEOFillStroke');
//this.closeEOFillStroke();
if (debug) console.log('closeEOFillStroke');
// this.closeEOFillStroke();
break;
case pdf.OPS.transform:
if(debug) console.log('transform');
//this.transform(args[0], args[1], args[2], args[3], args[4], args[5]);
if (debug) console.log('transform');
// this.transform(args[0], args[1], args[2], args[3], args[4], args[5]);
break;
case pdf.OPS.constructPath:
if(debug) console.log('constructPath');
//this.constructPath(args[0], args[1]);
if (debug) console.log('constructPath');
// this.constructPath(args[0], args[1]);
break;
case pdf.OPS.endPath:
if(debug) console.log('endPath');
//this.endPath();
if (debug) console.log('endPath');
// this.endPath();
break;
case 92:
if(debug) console.log('executeOpTree');
if (debug) console.log('executeOpTree');
this.executeOpTree(opTreeElement.items, visitor);
//this.group(opTreeElement.items);
// this.group(opTreeElement.items);
break;
default:
visitor.visit(fn, args, visitor);
@ -325,4 +321,4 @@ class GoldDigger {
}
}
module.exports = GoldDigger
module.exports = GoldDigger;

@ -5,4 +5,4 @@ const FONT_IDENTITY_MATRIX = [0.001, 0, 0, 0.001, 0, 0];
module.exports = {
IDENTITY_MATRIX,
FONT_IDENTITY_MATRIX,
}
};

@ -11,24 +11,24 @@ class ExtractText {
* @param {array} glyphs - glyphs from pdf.OPS.showText, pdf.OPS.showSpacedText
* @param {PdfPage} page - pdf page object
*/
showText(glyphs, page) {
showText (glyphs, page) {
// MOVED from VisitorText
let lineList = page.currentObject.getLine();
// -i ../../github.com/pdf.js/test/pdfs/ZapfDingbats.pdf -f text null pointer
const line = new Model.TextFont();
line.font = page.currentFont;
// copy from previous line
const lastLine = lineList.getLastFontText()
if(lastLine) {
const lastLine = lineList.getLastFontText();
if (lastLine) {
line.wordSpacing = lastLine.wordSpacing;
line.charSpacing = lastLine.charSpacing;
}
let startX = page.x;
let startY = page.y;
const startX = page.x;
const startY = page.y;
// END
let partial = "";
let partial = '';
let x = 0;
for(const glyph of glyphs) {
for (const glyph of glyphs) {
if (glyph === null) {
// Word break
x += line.font.direction * line.wordSpacing;
@ -36,13 +36,13 @@ class ExtractText {
} else if (util.isNum(glyph)) {
x += -glyph * line.font.size * 0.001;
if (!line.font.spaceWidthIsSet && line.isSpace(glyph)) {
partial += " ";
partial += ' ';
}
continue;
}
const spacing = (glyph.isSpace ? line.wordSpacing : 0) + line.charSpacing;
if(spacing > 0) {
console.warn(`Not implemented spacing : ${spacing} !`)
if (spacing > 0) {
console.warn(`Not implemented spacing : ${spacing} !`);
}
// TODO use glyph font character
partial += glyph.unicode;
@ -65,7 +65,7 @@ class ExtractText {
line.y = page.y;
line.setText(partial);
const isNew = lineList.y !== 0 && Math.abs(line.y - lineList.y) > line.font.size;
if(isNew) {
if (isNew) {
lineList.printText();
lineList = page.currentObject.newLine();
}
@ -81,9 +81,9 @@ class ExtractText {
* @returns {*} font from dependencies if found otherwise null
* (probably need to warn user for missing font inside document)
*/
getFontFamily(name, dependencies) {
for(let i = 0;i<dependencies.length;i++) {
if(dependencies[i].loadedName == name) {
getFontFamily (name, dependencies) {
for (let i = 0; i < dependencies.length; i++) {
if (dependencies[i].loadedName === name) {
return dependencies[i];
}
}
@ -95,27 +95,26 @@ class ExtractText {
* @param details - arguments from pdf.OPS.setFont
* @param {PdfPage} page - pdf page
*/
setFont(details, page) {
setFont (details, page) {
const fontObj = page.data.commonObjs.get(details[0]);
const font = new Model.FontObject();
// calculate space width
let spaceKey = -1
for(let key in fontObj.toUnicode._map) {
if(fontObj.toUnicode._map[key] === " ") {
let spaceKey = -1;
for (const key in fontObj.toUnicode._map) {
if (fontObj.toUnicode._map[key] === ' ') {
spaceKey = key;
break;
}
}
if(spaceKey > -1 && fontObj.widths[spaceKey]) {
if (spaceKey > -1 && fontObj.widths[spaceKey]) {
font.spaceWidthIsSet = true;
font.spaceWidth = fontObj.widths[spaceKey];
}
font.setSize(details[1]);
font.weight = fontObj.black ? (fontObj.bold ? 'bolder' : 'bold') :
(fontObj.bold ? 'bold' : 'normal');
font.weight = fontObj.black ? (fontObj.bold ? 'bolder' : 'bold') : (fontObj.bold ? 'bold' : 'normal');
font.style = fontObj.italic ? 'italic' : 'normal';
const family = this.getFontFamily(fontObj.loadedName, page.dependencies);
if(family) {
if (family) {
font.family = family.name;
} else {
font.family = fontObj.loadedName;
@ -126,5 +125,5 @@ class ExtractText {
}
module.exports = {
ExtractText
}
ExtractText,
};

@ -10,28 +10,27 @@ const saveFileAsync = async (fpath, data) => {
const stream = fs.createWriteStream(fpath);
await stream.write(data);
await stream.end();
}
};
/**
* Make directory if not exists in given path
* @param path - directory path
*/
const mkdirNotExists = (path) => {
if(!fs.existsSync(path)) {
if (!fs.existsSync(path)) {
fs.mkdirSync(path);
}
}
};
/**
* Reads directory
* @param {string} path - directory path
* @returns {string[]} - directory listing
*/
const readdirSync = (path) => {
return fs.readdirSync(path)
}
const readdirSync = (path) => fs.readdirSync(path);
module.exports = {
saveFileAsync,
mkdirNotExists,
readdirSync,
}
};

@ -4,12 +4,11 @@ const f = require('./formatters');
* Formats PDF to desired output
*/
class Formatter {
/**
* Constructor
* @param config
*/
constructor(config) {
constructor (config) {
this.debug = config.debug;
/**
* @type {{json: FormatterJSON, xml: FormatterXML, text: FormatterText}}
@ -18,8 +17,8 @@ class Formatter {
json: new f.FormatterJSON(),
xml: new f.FormatterXML(),
text: new f.FormatterText(),
}
this.data = "";
};
this.data = '';
}
/**
@ -28,7 +27,7 @@ class Formatter {
* @param doc - pdf document
* @param metadata - pdf document metadata
*/
start(format, doc, metadata) {
start (format, doc, metadata) {
const o = this.formatters[format].start(doc, metadata);
this.data += o;
if (this.debug) console.log(o);
@ -41,7 +40,7 @@ class Formatter {
* @param data - array of {PdfObject} to format
* @param last - is this page last page (useful for json formatting)
*/
format(format, page, data, last) {
format (format, page, data, last) {
const o = this.formatters[format].format(page, data, last);
this.data += o;
if (this.debug) console.log(o);
@ -51,11 +50,11 @@ class Formatter {
* End of formatting
* @param format - provided by command line parameter ex.text
*/
end(format) {
end (format) {
const o = this.formatters[format].end();
this.data += o;
if (this.debug) console.log(o);
}
}
module.exports = Formatter;
module.exports = Formatter;

@ -13,7 +13,6 @@ const FN_IMAGE = ['paintJpegXObject', 'paintImageXObject', 'paintInlineImageXObj
* Visits pdf.OPT.* methods using pdf page data
*/
class Visitor {
/**
* Constructor
* @param {object} config - application configuration
@ -46,10 +45,10 @@ class Visitor {
* @param fname - function name to be visited
* @param args - function arguments
*/
visit(fname, args) {
if(FN_TEXT.indexOf(fname) > -1) {
visit (fname, args) {
if (FN_TEXT.indexOf(fname) > -1) {
this.txt[fname](args);
} else if(FN_XOBJECT.indexOf(fname) > -1) {
} else if (FN_XOBJECT.indexOf(fname) > -1) {
this.xobject[fname](args);
} else if (FN_IMAGE.indexOf(fname) > -1) {
this.image[fname](args);
@ -59,4 +58,4 @@ class Visitor {
}
}
module.exports = Visitor
module.exports = Visitor;

@ -8,13 +8,13 @@ class FormatterJSON {
* @param metadata
* @returns {string}
*/
start(doc, metadata) {
const meta = JSON.stringify(metadata)
start (doc, metadata) {
const meta = JSON.stringify(metadata);
return `{
"pages_count": ${doc.numPages},
"metadata": ${meta},
"pages": {
`
`;
}
/**
@ -22,7 +22,7 @@ class FormatterJSON {
* @param {TextObject} textObject
* @returns {object}
*/
formatTextObject(textObject) {
formatTextObject (textObject) {
const txtObjOut = {
lines: [],
x: textObject.x,
@ -41,13 +41,13 @@ class FormatterJSON {
* @param {TextLine} textLine
* @returns {object}
*/
formatTextLine(textLine) {
formatTextLine (textLine) {
const txtLineOut = {
text: [],
x: textLine.x,
y: textLine.y,
width: textLine.width,
}
};
textLine.getData().forEach(textFont => {
const txtFontOut = this.formatTextFont(textFont);
txtLineOut.text.push(txtFontOut);
@ -60,7 +60,7 @@ class FormatterJSON {
* @param {TextFont} textFont
* @returns {object}
*/
formatTextFont(textFont) {
formatTextFont (textFont) {
return {
font: {
size: textFont.font.size,
@ -73,7 +73,7 @@ class FormatterJSON {
text: textFont.getText(),
charSpacing: textFont.charSpacing,
wordSpacing: textFont.wordSpacing,
}
};
}
/**
@ -83,27 +83,27 @@ class FormatterJSON {
* @param last
* @returns {string}
*/
format(page, data, last) {
format (page, data, last) {
const txtData = [];
data.forEach(textObject => {
const txtObjOut = this.formatTextObject(textObject);
txtData.push(txtObjOut);
});
let output = {
"data": txtData,
}
const out = JSON.stringify(output)// pretty print (output, null, 4)
return `"${page.pageIndex}": ${out}${last ? '': ','}`
const output = {
data: txtData,
};
const out = JSON.stringify(output); // pretty print (output, null, 4)
return `"${page.pageIndex}": ${out}${last ? '' : ','}`;
}
/**
* See {@link Formatter}
* @returns {string}
*/
end() {
end () {
return `}
}
`
`;
}
}

@ -8,8 +8,8 @@ class FormatterText {
* @param metadata
* @returns {string}
*/
start(doc, metadata) {
return ''
start (doc, metadata) {
return '';
}
/**
@ -17,7 +17,7 @@ class FormatterText {
* @param {TextObject} textObject
* @returns {string}
*/
formatTextObject(textObject) {
formatTextObject (textObject) {
let txtObjOut = '';
textObject.getData().forEach(textLine => {
const txtLineOut = this.formatTextLine(textLine);
@ -31,7 +31,7 @@ class FormatterText {
* @param {TextLine} textLine
* @returns {string}
*/
formatTextLine(textLine) {
formatTextLine (textLine) {
let txtLineOut = '';
textLine.getData().forEach(textFont => {
txtLineOut += this.formatTextFont(textFont);
@ -44,7 +44,7 @@ class FormatterText {
* @param {TextFont} textFont
* @returns {string|TextFont|*}
*/
formatTextFont(textFont) {
formatTextFont (textFont) {
return textFont.getText();
}
@ -55,7 +55,7 @@ class FormatterText {
* @param last
* @returns {string}
*/
format(page, data, last) {
format (page, data, last) {
let output = '';
data.forEach(textObject => {
const txtObjOut = this.formatTextObject(textObject);
@ -68,10 +68,9 @@ class FormatterText {
* See {@link Formatter}
* @returns {string}
*/
end() {
return ''
end () {
return '';
}
}
module.exports = FormatterText;

@ -8,10 +8,10 @@ class FormatterXML {
* @param metadata
* @returns {string}
*/
start(doc, metadata) {
start (doc, metadata) {
return `<?xml version="1.0" encoding="UTF-8"?>
<document>
`
`;
}
/**
@ -21,17 +21,17 @@ class FormatterXML {
* @param last
* @returns {string}
*/
format(page, data, last) {
format (page, data, last) {
const output = '';
return output
return output;
}
/**
* See {@link Formatter}
* @returns {string}
*/
end() {
return '</document>'
end () {
return '</document>';
}
}

@ -5,5 +5,5 @@ const FormatterXML = require('./FormatterXML');
module.exports = {
FormatterXML,
FormatterJSON,
FormatterText
}
FormatterText,
};

@ -5,7 +5,7 @@ class FontObject {
/**
* Constructor
*/
constructor() {
constructor () {
/**
* @type {number}
*/
@ -48,7 +48,7 @@ class FontObject {
* Set font size based on number if it's negative it will be other font direction
* @param {number} size
*/
setSize(size) {
setSize (size) {
if (size < 0) {
this.size = size;
this.direction = -1;
@ -57,4 +57,4 @@ class FontObject {
}
}
}
module.exports = FontObject
module.exports = FontObject;

@ -19,4 +19,4 @@ class PdfObject {
}
}
module.exports = PdfObject
module.exports = PdfObject;

@ -25,17 +25,17 @@ class PdfPage extends PdfObject {
this.wordSpacing = 0;
this.textHScale = 1;
this.textRise = 0;
this.currentObject;
this.currentFont;
this.currentObject = null;
this.currentFont = null;
}
/**
* Set current object and add it to objectList
* @param {TextObject} obj
*/
setCurrentObject(obj) {
this.currentObject = obj
setCurrentObject (obj) {
this.currentObject = obj;
this.objectList.push(obj);
}
}

@ -8,7 +8,10 @@ const TextLine = require('./text/TextLine');
* with multiple lines in pdf document
*/
class TextObject extends PdfObject {
constructor() {
/**
* Constructor
*/
constructor () {
super();
this.textMatrix = Constraints.IDENTITY_MATRIX;
this.textRenderingMode = util.TextRenderingMode.FILL;
@ -21,7 +24,7 @@ class TextObject extends PdfObject {
* Create new {@link TextLine} adds it to array and returns it as value
* @returns {TextLine}
*/
newLine() {
newLine () {
const t = new TextLine();
this._textLines.push(t);
return t;
@ -31,15 +34,15 @@ class TextObject extends PdfObject {
* Return last {@link TextLine} from text array
* @returns {TextLine}
*/
getLine() {
return this._textLines[this._textLines.length -1]
getLine () {
return this._textLines[this._textLines.length - 1];
}
/**
* Return array of {@link TextLine}
* @returns {Array}
*/
getData() {
getData () {
return this._textLines;
}
}

@ -12,4 +12,4 @@ module.exports = {
TextFont,
TextLine,
PdfPage,
}
};

@ -5,10 +5,13 @@ const PdfObject = require('./../PdfObject');
* @extends {PdfObject}
*/
class TextFont extends PdfObject {
constructor() {
/**
* Constructor
*/
constructor () {
super();
this.font = null;
this._text = "";
this._text = '';
this.charSpacing = 0;
this.wordSpacing = 0;
this._tolerance = 5;
@ -18,8 +21,8 @@ class TextFont extends PdfObject {
* Setter for text string
* @param text {string}
*/
setText(text) {
if(this._text.length > 0) {
setText (text) {
if (this._text.length > 0) {
this._text += text;
} else {
this._text = text;
@ -31,12 +34,12 @@ class TextFont extends PdfObject {
* @param {number} glyph - numeric size
* @returns {boolean} - flag true if it's space false if it's not
*/
isSpace(glyph) {
if(-glyph >= this.font.spaceWidth) {
isSpace (glyph) {
if (-glyph >= this.font.spaceWidth) {
return true;
} else if(this.font.size < 10) {
} else if (this.font.size < 10) {
const space = this.font.spaceWidth - (10 * Math.round(this.font.size)) - this._tolerance;
if(-glyph >= space) {
if (-glyph >= space) {
return true;
}
}
@ -47,9 +50,9 @@ class TextFont extends PdfObject {
* Getter for text string
* @returns {string}
*/
getText() {
getText () {
return this._text;
}
}
module.exports = TextFont;
module.exports = TextFont;

@ -1,21 +1,24 @@
const PdfObject = require('./../PdfObject');
const TextFont = require('./../text/TextFont');
/**
* Represents text line in pdf file
* @extends {PdfObject}
*/
class TextLine extends PdfObject {
constructor() {
/**
* Constructor
*/
constructor () {
super();
this._textFonts = [];
this.width = 0;
}
/**
* Adds line with font to text
* @param {TextFont} line
*/
addTextFont(line) {
addTextFont (line) {
this._textFonts.push(line);
}
@ -23,18 +26,19 @@ class TextLine extends PdfObject {
* Get last {@link TextFont}
* @returns {TextFont}
*/
getLastFontText() {
if(this._textFonts.length > 0) {
return this._textFonts[this._textFonts.length - 1]
getLastFontText () {
if (this._textFonts.length > 0) {
return this._textFonts[this._textFonts.length - 1];
}
return null
return null;
}
/**
* Output line text to the console
*/
printText() {
let txt = "";
this._textFonts.forEach((el) => txt += el.getText());
printText () {
let txt = '';
this._textFonts.forEach((el) => { txt += el.getText(); });
console.log(txt);
}
@ -42,7 +46,7 @@ class TextLine extends PdfObject {
* Get Array of all {@link TextFont} data inside this text line
* @returns {Array} of {@link TextFont}
*/
getData() {
getData () {
// this._textFonts.sort((a, b) => a.x >= b.x);
this.x = this._textFonts[0].x;
this.width = this._textFonts[this._textFonts.length - 1].x - this.x;

@ -7,10 +7,10 @@ class VisitorBase {
* @param config - configuration
* @param {PdfPage} page
*/
constructor(config, page) {
constructor (config, page) {
this.config = config;
this.page = page;
}
}
module.exports = VisitorBase;
module.exports = VisitorBase;

@ -7,7 +7,11 @@ const FileManager = require('../FileManager');
* @extends {VisitorBase}
*/
class VisitorImage extends VisitorBase {
/**
* Constructor
* @param config
* @param {PdfPage} page
*/
constructor (config, page) {
super(config, page);
FileManager.mkdirNotExists(`${this.config.outputDir}/img`);
@ -16,18 +20,19 @@ class VisitorImage extends VisitorBase {
/**
* pdf.OPS.paintJpegXObject
*/
paintJpegXObject(args) {
paintJpegXObject (args) {
if (this.config.debug) console.log('paintJpegXObject');
// if (this.config.skip) return;
const objId = args[1], w = args[1], h = args[2];
console.log(objId);
const objId = args[1];
const w = args[1];
const h = args[2];
console.log(objId, w, h);
}
/**
* pdf.OPS.paintImageXObject
*/
paintImageXObject(args) {
paintImageXObject (args) {
if (this.config.debug) console.log('paintImageXObject');
// if (this.config.skip) return;
const imgData = this.page.data.objs.get(args[0]);
@ -37,7 +42,7 @@ class VisitorImage extends VisitorBase {
/**
* pdf.OPS.paintImageMaskXObject
*/
paintImageMaskXObject(args) {
paintImageMaskXObject (args) {
if (this.config.debug) console.log('paintImageMaskXObject');
// if (this.config.skip) return;
this.paintInlineImageXObject(args[0]);
@ -46,7 +51,7 @@ class VisitorImage extends VisitorBase {
/**
* pdf.OPS.paintInlineImageXObject
*/
async paintInlineImageXObject(args) {
async paintInlineImageXObject (args) {
if (this.config.debug) console.log('paintInlineImageXObject');
// if (this.config.skip) return;
const imgData = args[0];
@ -54,7 +59,7 @@ class VisitorImage extends VisitorBase {
// TODO imlement mask
const mask = false;
const imgBinary = pdfjs.convertImgDataToPng(imgData, this.forceDataSchema, !!mask);
const fpath = `${this.config.outputDir}/img/page.${this.page.data.pageIndex}.${args[1]}.png`
const fpath = `${this.config.outputDir}/img/page.${this.page.data.pageIndex}.${args[1]}.png`;
await FileManager.saveFileAsync(fpath, imgBinary);
}
}

@ -7,7 +7,7 @@ const VisitorBase = require('./VisitorBase');
* @extends {VisitorBase}
*/
class VisitorText extends VisitorBase {
constructor(config, page) {
constructor (config, page) {
super(config, page);
this.txt = new Extract.ExtractText();
}
@ -15,21 +15,20 @@ class VisitorText extends VisitorBase {
/**
* pdf.OPS.beginText
*/
beginText(args) {
beginText (args) {
if (this.config.debug) console.log('beginText');
if (this.config.skip) return;
if(!this.page.currentObject || !("textMatrixScale" in this.page.currentObject)) {
if (!this.page.currentObject || !('textMatrixScale' in this.page.currentObject)) {
this.page.setCurrentObject(new Model.TextObject());
// SHOULD determine if new line while extracting text cause it can begin in any time
this.page.currentObject.newLine();
}
}
/**
* pdf.OPS.setLeading
*/
setLeading(args) {
setLeading (args) {
if (this.config.debug) console.log('setLeading');
if (this.config.skip) return;
this.page.leading = -args[0];
@ -38,10 +37,11 @@ class VisitorText extends VisitorBase {
/**
* pdf.OPS.setLeadingMoveText
*/
setLeadingMoveText(args) {
if(debug) console.log('setLeadingMoveText');
setLeadingMoveText (args) {
if (this.config.debug) console.log('setLeadingMoveText');
if (this.config.skip) return;
const x = args[0], y = args[1];
const x = args[0];
const y = args[1];
this.page.leading = -y;
this.moveText(x, y);
}
@ -49,26 +49,26 @@ class VisitorText extends VisitorBase {
/**
* pdf.OPS.setFont
*/
setFont(args) {
setFont (args) {
if (this.config.debug) console.log('setFont');
if (this.config.skip) return;
this.txt.setFont(args, this.page)
this.txt.setFont(args, this.page);
}
/**
* pdf.OPS.showText
*/
showText(args) {
if (this.config.debug) console.log("showText");
showText (args) {
if (this.config.debug) console.log('showText');
if (this.config.skip) return;
this.txt.showText(args[0], this.page)
this.txt.showText(args[0], this.page);
}
/**
* pdf.OPS.showSpacedText
*/
showSpacedText(args) {
if (this.config.debug) console.log("showSpacedText");
showSpacedText (args) {
if (this.config.debug) console.log('showSpacedText');
if (this.config.skip) return;
this.txt.setText(args[0], this.page);
}
@ -76,57 +76,62 @@ class VisitorText extends VisitorBase {
/**
* pdf.OPS.moveText
*/
moveText(args) {
moveText (args) {
if (this.config.debug) console.log('moveText');
if (this.config.skip) return;
if(this.page.x === 0 && this.page.y === 0) {
if (this.page.x === 0 && this.page.y === 0) {
this.page.x = args[0];
this.page.y = args[1];
} else {
this.page.x = args[0]
this.page.y += args[1]
this.page.x = args[0];
this.page.y += args[1];
}
}
/**
* pdf.OPS.endText
*/
endText(args) {
if (this.debug) console.log('endText');
if (this.config.skip) return;
endText (args) {
if (this.config.debug) console.log('endText');
// if (this.config.skip) return;
}
/**
* pdf.OPS.setCharSpacing
*/
setCharSpacing(args) {
if (this.debug) console.log('setCharSpacing');
if (this.config.skip) return;
setCharSpacing (args) {
if (this.config.debug) console.log('setCharSpacing');
// if (this.config.skip) return;
}
/**
* pdf.OPS.setWordSpacing
*/
setWordSpacing(args) {
setWordSpacing (args) {
if (this.debug) console.log('setWordSpacing');
if (this.config.skip) return;
// if (this.config.skip) return;
}
/**
* pdf.OPS.setHScale
*/
setHScale(args) {
if (this.debug) console.log('setHScale');
if (this.config.skip) return;
setHScale (args) {
if (this.config.debug) console.log('setHScale');
// if (this.config.skip) return;
}
/**
* pdf.OPS.setTextMatrix
*/
setTextMatrix(args) {
if (this.debug) console.log('setWordSpacing');
setTextMatrix (args) {
if (this.config.debug) console.log('setWordSpacing');
if (this.config.skip) return;
const a = args[0], b = args[1], c = args[2], d = args[3], e = args[4], f = args[5];
const a = args[0];
const b = args[1];
const c = args[2];
const d = args[3];
const e = args[4];
const f = args[5];
this.page.currentObject.textMatrix = this.page.currentObject.lineMatrix = [a, b, c, d, e, f];
this.page.x = e;
this.page.y = f;
@ -135,26 +140,26 @@ class VisitorText extends VisitorBase {
/**
* pdf.OPS.setTextRise
*/
setTextRise(args) {
if (this.debug) console.log('setTextRise');
if (this.config.skip) return;
setTextRise (args) {
if (this.config.debug) console.log('setTextRise');
// if (this.config.skip) return;
}
/**
* pdf.OPS.setTextRenderingMode
*/
setTextRenderingMode(args) {
if (this.debug) console.log('setTextRenderingMode');
if (this.config.skip) return;
setTextRenderingMode (args) {
if (this.config.debug) console.log('setTextRenderingMode');
// if (this.config.skip) return;
}
/**
* pdf.OPS.nextLine
*/
nextLine(args) {
if (this.debug) console.log('nextLine');
if (this.config.skip) return;
nextLine (args) {
if (this.config.debug) console.log('nextLine');
// if (this.config.skip) return;
}
}
module.exports = VisitorText
module.exports = VisitorText;

@ -6,13 +6,12 @@ const VisitorBase = require('./VisitorBase');
* @extends {VisitorBase}
*/
class VisitorXObject extends VisitorBase {
/**
* pdf.OPS.paintFormXObjectBegin
*/
paintFormXObjectBegin(args) {
paintFormXObjectBegin (args) {
if (this.config.debug) console.log('paintFormXObjectBegin');
if(!this.config.paintFormXObject) {
if (!this.config.paintFormXObject) {
this.config.skip = true;
}
}
@ -20,10 +19,10 @@ class VisitorXObject extends VisitorBase {
/**
* pdf.OPS.paintFormXObjectEnd
*/
paintFormXObjectEnd(args) {
paintFormXObjectEnd (args) {
if (this.config.debug) console.log('paintFormXObjectEnd');
this.config.skip = false;
}
}
module.exports = VisitorXObject
module.exports = VisitorXObject;

@ -4,4 +4,4 @@ const VisitorImage = require('./VisitorImage');
module.exports = {
VisitorText, VisitorXObject, VisitorImage,
}
};

@ -6,7 +6,7 @@ const ImageKind = util.ImageKind;
/**
* See pdf.js/src/display/svg.js
*/
const convertImgDataToPng = (function() {
const convertImgDataToPng = (() => {
const PNG_HEADER =
new Uint8Array([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]);
const CHUNK_WRAPPER_SIZE = 12;
@ -24,7 +24,7 @@ const convertImgDataToPng = (function() {
crcTable[i] = c;
}
function crc32(data, start, end) {
function crc32 (data, start, end) {
let crc = -1;
for (let i = start; i < end; i++) {
const a = (crc ^ data[i]) & 0xff;
@ -34,7 +34,7 @@ const convertImgDataToPng = (function() {
return crc ^ -1;
}
function writePngChunk(type, body, data, offset) {
function writePngChunk (type, body, data, offset) {
let p = offset;
const len = body.length;
@ -60,7 +60,7 @@ const convertImgDataToPng = (function() {
data[p + 3] = crc & 0xff;
}
function adler32(data, start, end) {
function adler32 (data, start, end) {
let a = 1;
let b = 0;
for (let i = start; i < end; ++i) {
@ -76,7 +76,7 @@ const convertImgDataToPng = (function() {
* This is the required format for compressed streams in the PNG format:
* http://www.libpng.org/pub/png/spec/1.2/PNG-Compression.html
*/
function deflateSync(literals) {
function deflateSync (literals) {
if (!isNodeJS()) {
// zlib is certainly not available outside of Node.js. We can either use
// the pako library for client-side DEFLATE compression, or use the canvas
@ -99,21 +99,22 @@ const convertImgDataToPng = (function() {
input = literals;
} else {
// eslint-disable-next-line no-undef
input = new Buffer(literals);
// input = new Buffer(literals);
input = Buffer.from(literals);
}
// const output = __non_webpack_require__('zlib')
const output = zlib
.deflateSync(input, { level: 9, });
.deflateSync(input, { level: 9 });
return output instanceof Uint8Array ? output : new Uint8Array(output);
} catch (e) {
warn('Not compressing PNG because zlib.deflateSync is unavailable: ' + e);
console.warn('Not compressing PNG because zlib.deflateSync is unavailable: ' + e);
}
return deflateSyncUncompressed(literals);
}
// An implementation of DEFLATE with compression level 0 (Z_NO_COMPRESSION).
function deflateSyncUncompressed(literals) {
function deflateSyncUncompressed (literals) {
let len = literals.length;
const maxBlockLength = 0xFFFF;
@ -154,7 +155,7 @@ const convertImgDataToPng = (function() {
return idat;
}
function encode(imgData, kind, forceDataSchema, isMask) {
function encode (imgData, kind, forceDataSchema, isMask) {
const width = imgData.width;
const height = imgData.height;
let bitDepth, colorType, lineSize;
@ -182,7 +183,8 @@ const convertImgDataToPng = (function() {
// prefix every row with predictor 0
const literals = new Uint8Array((1 + lineSize) * height);
let offsetLiterals = 0, offsetBytes = 0;
let offsetLiterals = 0;
let offsetBytes = 0;
for (let y = 0; y < height; ++y) {
literals[offsetLiterals++] = 0; // no prediction
literals.set(bytes.subarray(offsetBytes, offsetBytes + lineSize),
@ -215,7 +217,7 @@ const convertImgDataToPng = (function() {
colorType, // color type
0x00, // compression method
0x00, // filter method
0x00 // interlace method
0x00, // interlace method
]);
const idat = deflateSync(literals);
@ -232,12 +234,11 @@ const convertImgDataToPng = (function() {
offset += CHUNK_WRAPPER_SIZE + idat.length;
writePngChunk('IEND', new Uint8Array(0), data, offset);
return data;
//return util.createObjectURL(data, 'image/png', forceDataSchema);
// return util.createObjectURL(data, 'image/png', forceDataSchema);
}
return function convertImgDataToPng(imgData, forceDataSchema, isMask) {
const kind = (imgData.kind === undefined ?
ImageKind.GRAYSCALE_1BPP : imgData.kind);
return function convertImgDataToPng (imgData, forceDataSchema, isMask) {
const kind = (imgData.kind === undefined ? ImageKind.GRAYSCALE_1BPP : imgData.kind);
return encode(imgData, kind, forceDataSchema, isMask);
};
})();