Add eslint standard with small modifications

- semi always
- comma-dangle always-multiple
This commit is contained in:
Michal Szczepanski 2019-07-28 09:40:39 +02:00
parent 4fd9b6024c
commit dea317eda8
25 changed files with 311 additions and 282 deletions

11
.eslintrc.json Normal file

@ -0,0 +1,11 @@
{
"extends": "standard",
"parserOptions": {
"sourceType": "module",
"ecmaVersion": 2017
},
"rules": {
"semi": [2, "always"],
"comma-dangle": ["error", "always-multiline"]
}
}

@ -24,11 +24,18 @@
"pdf-text-extract"
],
"scripts": {
"doc": "esdoc"
"doc": "esdoc",
"lint": "eslint src"
},
"devDependencies": {
"esdoc": "^1.1.0",
"esdoc-node": "^1.0.4",
"esdoc-standard-plugin": "^1.0.0"
"esdoc-standard-plugin": "^1.0.0",
"eslint": "^6.1.0",
"eslint-config-standard": "^13.0.1",
"eslint-plugin-import": "^2.18.2",
"eslint-plugin-node": "^9.1.0",
"eslint-plugin-promise": "^4.2.1",
"eslint-plugin-standard": "^4.0.0"
}
}

@ -1,7 +1,5 @@
const fs = require('fs');
const pdf = require('pdfjs-dist');
const util = require('pdfjs-dist/lib/shared/util');
const Extract = require('./pdf/Extract');
const Visitor = require('./pdf/Visitor');
const Formatter = require('./pdf/Formatter');
const FileManager = require('./pdf/FileManager');
@ -18,7 +16,6 @@ class GoldDiggerError extends Error{
* Code based on pdf.js SVGGraphics
*/
class GoldDigger {
/**
* Constructor
* @param {object} config - configuration
@ -45,7 +42,7 @@ class GoldDigger {
const doc = await pdf.getDocument({
data: data,
}).promise;
return doc
return doc;
}
/**
@ -63,12 +60,12 @@ class GoldDigger {
// read pages
for (let pageNum = 1; pageNum <= doc.numPages; pageNum++) {
const pageData = await doc.getPage(pageNum);
const viewport = pageData.getViewport({ scale: 1.0, });
const viewport = pageData.getViewport({ scale: 1.0 });
if (debug) console.log(`--- BEGIN Page ${pageNum} size: ${viewport.width}x${viewport.height}`);
const output = await this.digPage(pageData, pageNum);
const last = pageNum == doc.numPages;
const last = pageNum === doc.numPages;
this.formatter.format(format, pageData, output, last);
if(debug) console.log(`--- END Page ${pageNum} objects : ${output.length}`)
if (debug) console.log(`--- END Page ${pageNum} objects : ${output.length}`);
}
this.formatter.end(format);
// save to file
@ -82,7 +79,6 @@ class GoldDigger {
* @param {number} pageNum - page number
*/
async digPage (pageData, pageNum) {
// const text = await page.extractTextContent();
const operatorList = await pageData.getOperatorList();
// page.commonObjs, page.objs
@ -121,8 +117,8 @@ class GoldDigger {
* (SVGGraphics)
*/
operatorMapping () {
const mapping = {}
for(var op in pdf.OPS) {
const mapping = {};
for (const op in pdf.OPS) {
mapping[pdf.OPS[op]] = op;
}
return mapping;
@ -141,9 +137,9 @@ class GoldDigger {
for (let i = 0, ii = fnArray.length; i < ii; i++) {
const fnId = fnArray[i];
opList.push({
'fnId': fnId,
'fn': operatorIdMapping[fnId],
'args': argsArray[i],
fnId,
fn: operatorIdMapping[fnId],
args: argsArray[i],
});
}
return this.opListToTree(opList);
@ -160,7 +156,7 @@ class GoldDigger {
for (const opListElement of opList) {
if (opListElement.fn === 'save') {
opTree.push({ 'fnId': 92, 'fn': 'group', 'items': [], });
opTree.push({ fnId: 92, fn: 'group', items: [] });
tmp.push(opTree);
opTree = opTree[opTree.length - 1].items;
continue;
@ -325,4 +321,4 @@ class GoldDigger {
}
}
module.exports = GoldDigger
module.exports = GoldDigger;

@ -5,4 +5,4 @@ const FONT_IDENTITY_MATRIX = [0.001, 0, 0, 0.001, 0, 0];
module.exports = {
IDENTITY_MATRIX,
FONT_IDENTITY_MATRIX,
}
};

@ -18,15 +18,15 @@ class ExtractText {
const line = new Model.TextFont();
line.font = page.currentFont;
// copy from previous line
const lastLine = lineList.getLastFontText()
const lastLine = lineList.getLastFontText();
if (lastLine) {
line.wordSpacing = lastLine.wordSpacing;
line.charSpacing = lastLine.charSpacing;
}
let startX = page.x;
let startY = page.y;
const startX = page.x;
const startY = page.y;
// END
let partial = "";
let partial = '';
let x = 0;
for (const glyph of glyphs) {
if (glyph === null) {
@ -36,13 +36,13 @@ class ExtractText {
} else if (util.isNum(glyph)) {
x += -glyph * line.font.size * 0.001;
if (!line.font.spaceWidthIsSet && line.isSpace(glyph)) {
partial += " ";
partial += ' ';
}
continue;
}
const spacing = (glyph.isSpace ? line.wordSpacing : 0) + line.charSpacing;
if (spacing > 0) {
console.warn(`Not implemented spacing : ${spacing} !`)
console.warn(`Not implemented spacing : ${spacing} !`);
}
// TODO use glyph font character
partial += glyph.unicode;
@ -83,7 +83,7 @@ class ExtractText {
*/
getFontFamily (name, dependencies) {
for (let i = 0; i < dependencies.length; i++) {
if(dependencies[i].loadedName == name) {
if (dependencies[i].loadedName === name) {
return dependencies[i];
}
}
@ -99,9 +99,9 @@ class ExtractText {
const fontObj = page.data.commonObjs.get(details[0]);
const font = new Model.FontObject();
// calculate space width
let spaceKey = -1
for(let key in fontObj.toUnicode._map) {
if(fontObj.toUnicode._map[key] === " ") {
let spaceKey = -1;
for (const key in fontObj.toUnicode._map) {
if (fontObj.toUnicode._map[key] === ' ') {
spaceKey = key;
break;
}
@ -111,8 +111,7 @@ class ExtractText {
font.spaceWidth = fontObj.widths[spaceKey];
}
font.setSize(details[1]);
font.weight = fontObj.black ? (fontObj.bold ? 'bolder' : 'bold') :
(fontObj.bold ? 'bold' : 'normal');
font.weight = fontObj.black ? (fontObj.bold ? 'bolder' : 'bold') : (fontObj.bold ? 'bold' : 'normal');
font.style = fontObj.italic ? 'italic' : 'normal';
const family = this.getFontFamily(fontObj.loadedName, page.dependencies);
if (family) {
@ -126,5 +125,5 @@ class ExtractText {
}
module.exports = {
ExtractText
}
ExtractText,
};

@ -10,7 +10,7 @@ const saveFileAsync = async (fpath, data) => {
const stream = fs.createWriteStream(fpath);
await stream.write(data);
await stream.end();
}
};
/**
* Make directory if not exists in given path
@ -20,18 +20,17 @@ const mkdirNotExists = (path) => {
if (!fs.existsSync(path)) {
fs.mkdirSync(path);
}
}
};
/**
* Reads directory
* @param {string} path - directory path
* @returns {string[]} - directory listing
*/
const readdirSync = (path) => {
return fs.readdirSync(path)
}
const readdirSync = (path) => fs.readdirSync(path);
module.exports = {
saveFileAsync,
mkdirNotExists,
readdirSync,
}
};

@ -4,7 +4,6 @@ const f = require('./formatters');
* Formats PDF to desired output
*/
class Formatter {
/**
* Constructor
* @param config
@ -18,8 +17,8 @@ class Formatter {
json: new f.FormatterJSON(),
xml: new f.FormatterXML(),
text: new f.FormatterText(),
}
this.data = "";
};
this.data = '';
}
/**

@ -13,7 +13,6 @@ const FN_IMAGE = ['paintJpegXObject', 'paintImageXObject', 'paintInlineImageXObj
* Visits pdf.OPT.* methods using pdf page data
*/
class Visitor {
/**
* Constructor
* @param {object} config - application configuration
@ -59,4 +58,4 @@ class Visitor {
}
}
module.exports = Visitor
module.exports = Visitor;

@ -9,12 +9,12 @@ class FormatterJSON {
* @returns {string}
*/
start (doc, metadata) {
const meta = JSON.stringify(metadata)
const meta = JSON.stringify(metadata);
return `{
"pages_count": ${doc.numPages},
"metadata": ${meta},
"pages": {
`
`;
}
/**
@ -47,7 +47,7 @@ class FormatterJSON {
x: textLine.x,
y: textLine.y,
width: textLine.width,
}
};
textLine.getData().forEach(textFont => {
const txtFontOut = this.formatTextFont(textFont);
txtLineOut.text.push(txtFontOut);
@ -73,7 +73,7 @@ class FormatterJSON {
text: textFont.getText(),
charSpacing: textFont.charSpacing,
wordSpacing: textFont.wordSpacing,
}
};
}
/**
@ -89,11 +89,11 @@ class FormatterJSON {
const txtObjOut = this.formatTextObject(textObject);
txtData.push(txtObjOut);
});
let output = {
"data": txtData,
}
const out = JSON.stringify(output)// pretty print (output, null, 4)
return `"${page.pageIndex}": ${out}${last ? '': ','}`
const output = {
data: txtData,
};
const out = JSON.stringify(output); // pretty print (output, null, 4)
return `"${page.pageIndex}": ${out}${last ? '' : ','}`;
}
/**
@ -103,7 +103,7 @@ class FormatterJSON {
end () {
return `}
}
`
`;
}
}

@ -9,7 +9,7 @@ class FormatterText {
* @returns {string}
*/
start (doc, metadata) {
return ''
return '';
}
/**
@ -69,9 +69,8 @@ class FormatterText {
* @returns {string}
*/
end () {
return ''
return '';
}
}
module.exports = FormatterText;

@ -11,7 +11,7 @@ class FormatterXML {
start (doc, metadata) {
return `<?xml version="1.0" encoding="UTF-8"?>
<document>
`
`;
}
/**
@ -23,7 +23,7 @@ class FormatterXML {
*/
format (page, data, last) {
const output = '';
return output
return output;
}
/**
@ -31,7 +31,7 @@ class FormatterXML {
* @returns {string}
*/
end () {
return '</document>'
return '</document>';
}
}

@ -5,5 +5,5 @@ const FormatterXML = require('./FormatterXML');
module.exports = {
FormatterXML,
FormatterJSON,
FormatterText
}
FormatterText,
};

@ -57,4 +57,4 @@ class FontObject {
}
}
}
module.exports = FontObject
module.exports = FontObject;

@ -19,4 +19,4 @@ class PdfObject {
}
}
module.exports = PdfObject
module.exports = PdfObject;

@ -26,8 +26,8 @@ class PdfPage extends PdfObject {
this.textHScale = 1;
this.textRise = 0;
this.currentObject;
this.currentFont;
this.currentObject = null;
this.currentFont = null;
}
/**
@ -35,7 +35,7 @@ class PdfPage extends PdfObject {
* @param {TextObject} obj
*/
setCurrentObject (obj) {
this.currentObject = obj
this.currentObject = obj;
this.objectList.push(obj);
}
}

@ -8,6 +8,9 @@ const TextLine = require('./text/TextLine');
* with multiple lines in pdf document
*/
class TextObject extends PdfObject {
/**
* Constructor
*/
constructor () {
super();
this.textMatrix = Constraints.IDENTITY_MATRIX;
@ -32,7 +35,7 @@ class TextObject extends PdfObject {
* @returns {TextLine}
*/
getLine () {
return this._textLines[this._textLines.length -1]
return this._textLines[this._textLines.length - 1];
}
/**

@ -12,4 +12,4 @@ module.exports = {
TextFont,
TextLine,
PdfPage,
}
};

@ -5,10 +5,13 @@ const PdfObject = require('./../PdfObject');
* @extends {PdfObject}
*/
class TextFont extends PdfObject {
/**
* Constructor
*/
constructor () {
super();
this.font = null;
this._text = "";
this._text = '';
this.charSpacing = 0;
this.wordSpacing = 0;
this._tolerance = 5;

@ -1,16 +1,19 @@
const PdfObject = require('./../PdfObject');
const TextFont = require('./../text/TextFont');
/**
* Represents text line in pdf file
* @extends {PdfObject}
*/
class TextLine extends PdfObject {
/**
* Constructor
*/
constructor () {
super();
this._textFonts = [];
this.width = 0;
}
/**
* Adds line with font to text
* @param {TextFont} line
@ -25,16 +28,17 @@ class TextLine extends PdfObject {
*/
getLastFontText () {
if (this._textFonts.length > 0) {
return this._textFonts[this._textFonts.length - 1]
return this._textFonts[this._textFonts.length - 1];
}
return null
return null;
}
/**
* Output line text to the console
*/
printText () {
let txt = "";
this._textFonts.forEach((el) => txt += el.getText());
let txt = '';
this._textFonts.forEach((el) => { txt += el.getText(); });
console.log(txt);
}

@ -7,7 +7,11 @@ const FileManager = require('../FileManager');
* @extends {VisitorBase}
*/
class VisitorImage extends VisitorBase {
/**
* Constructor
* @param config
* @param {PdfPage} page
*/
constructor (config, page) {
super(config, page);
FileManager.mkdirNotExists(`${this.config.outputDir}/img`);
@ -19,11 +23,12 @@ class VisitorImage extends VisitorBase {
paintJpegXObject (args) {
if (this.config.debug) console.log('paintJpegXObject');
// if (this.config.skip) return;
const objId = args[1], w = args[1], h = args[2];
console.log(objId);
const objId = args[1];
const w = args[1];
const h = args[2];
console.log(objId, w, h);
}
/**
* pdf.OPS.paintImageXObject
*/
@ -54,7 +59,7 @@ class VisitorImage extends VisitorBase {
// TODO imlement mask
const mask = false;
const imgBinary = pdfjs.convertImgDataToPng(imgData, this.forceDataSchema, !!mask);
const fpath = `${this.config.outputDir}/img/page.${this.page.data.pageIndex}.${args[1]}.png`
const fpath = `${this.config.outputDir}/img/page.${this.page.data.pageIndex}.${args[1]}.png`;
await FileManager.saveFileAsync(fpath, imgBinary);
}
}

@ -18,12 +18,11 @@ class VisitorText extends VisitorBase {
beginText (args) {
if (this.config.debug) console.log('beginText');
if (this.config.skip) return;
if(!this.page.currentObject || !("textMatrixScale" in this.page.currentObject)) {
if (!this.page.currentObject || !('textMatrixScale' in this.page.currentObject)) {
this.page.setCurrentObject(new Model.TextObject());
// SHOULD determine if new line while extracting text cause it can begin in any time
this.page.currentObject.newLine();
}
}
/**
@ -39,9 +38,10 @@ class VisitorText extends VisitorBase {
* pdf.OPS.setLeadingMoveText
*/
setLeadingMoveText (args) {
if(debug) console.log('setLeadingMoveText');
if (this.config.debug) console.log('setLeadingMoveText');
if (this.config.skip) return;
const x = args[0], y = args[1];
const x = args[0];
const y = args[1];
this.page.leading = -y;
this.moveText(x, y);
}
@ -52,23 +52,23 @@ class VisitorText extends VisitorBase {
setFont (args) {
if (this.config.debug) console.log('setFont');
if (this.config.skip) return;
this.txt.setFont(args, this.page)
this.txt.setFont(args, this.page);
}
/**
* pdf.OPS.showText
*/
showText (args) {
if (this.config.debug) console.log("showText");
if (this.config.debug) console.log('showText');
if (this.config.skip) return;
this.txt.showText(args[0], this.page)
this.txt.showText(args[0], this.page);
}
/**
* pdf.OPS.showSpacedText
*/
showSpacedText (args) {
if (this.config.debug) console.log("showSpacedText");
if (this.config.debug) console.log('showSpacedText');
if (this.config.skip) return;
this.txt.setText(args[0], this.page);
}
@ -83,8 +83,8 @@ class VisitorText extends VisitorBase {
this.page.x = args[0];
this.page.y = args[1];
} else {
this.page.x = args[0]
this.page.y += args[1]
this.page.x = args[0];
this.page.y += args[1];
}
}
@ -92,16 +92,16 @@ class VisitorText extends VisitorBase {
* pdf.OPS.endText
*/
endText (args) {
if (this.debug) console.log('endText');
if (this.config.skip) return;
if (this.config.debug) console.log('endText');
// if (this.config.skip) return;
}
/**
* pdf.OPS.setCharSpacing
*/
setCharSpacing (args) {
if (this.debug) console.log('setCharSpacing');
if (this.config.skip) return;
if (this.config.debug) console.log('setCharSpacing');
// if (this.config.skip) return;
}
/**
@ -109,24 +109,29 @@ class VisitorText extends VisitorBase {
*/
setWordSpacing (args) {
if (this.debug) console.log('setWordSpacing');
if (this.config.skip) return;
// if (this.config.skip) return;
}
/**
* pdf.OPS.setHScale
*/
setHScale (args) {
if (this.debug) console.log('setHScale');
if (this.config.skip) return;
if (this.config.debug) console.log('setHScale');
// if (this.config.skip) return;
}
/**
* pdf.OPS.setTextMatrix
*/
setTextMatrix (args) {
if (this.debug) console.log('setWordSpacing');
if (this.config.debug) console.log('setWordSpacing');
if (this.config.skip) return;
const a = args[0], b = args[1], c = args[2], d = args[3], e = args[4], f = args[5];
const a = args[0];
const b = args[1];
const c = args[2];
const d = args[3];
const e = args[4];
const f = args[5];
this.page.currentObject.textMatrix = this.page.currentObject.lineMatrix = [a, b, c, d, e, f];
this.page.x = e;
this.page.y = f;
@ -136,25 +141,25 @@ class VisitorText extends VisitorBase {
* pdf.OPS.setTextRise
*/
setTextRise (args) {
if (this.debug) console.log('setTextRise');
if (this.config.skip) return;
if (this.config.debug) console.log('setTextRise');
// if (this.config.skip) return;
}
/**
* pdf.OPS.setTextRenderingMode
*/
setTextRenderingMode (args) {
if (this.debug) console.log('setTextRenderingMode');
if (this.config.skip) return;
if (this.config.debug) console.log('setTextRenderingMode');
// if (this.config.skip) return;
}
/**
* pdf.OPS.nextLine
*/
nextLine (args) {
if (this.debug) console.log('nextLine');
if (this.config.skip) return;
if (this.config.debug) console.log('nextLine');
// if (this.config.skip) return;
}
}
module.exports = VisitorText
module.exports = VisitorText;

@ -6,7 +6,6 @@ const VisitorBase = require('./VisitorBase');
* @extends {VisitorBase}
*/
class VisitorXObject extends VisitorBase {
/**
* pdf.OPS.paintFormXObjectBegin
*/
@ -26,4 +25,4 @@ class VisitorXObject extends VisitorBase {
}
}
module.exports = VisitorXObject
module.exports = VisitorXObject;

@ -4,4 +4,4 @@ const VisitorImage = require('./VisitorImage');
module.exports = {
VisitorText, VisitorXObject, VisitorImage,
}
};

@ -6,7 +6,7 @@ const ImageKind = util.ImageKind;
/**
* See pdf.js/src/display/svg.js
*/
const convertImgDataToPng = (function() {
const convertImgDataToPng = (() => {
const PNG_HEADER =
new Uint8Array([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]);
const CHUNK_WRAPPER_SIZE = 12;
@ -99,14 +99,15 @@ const convertImgDataToPng = (function() {
input = literals;
} else {
// eslint-disable-next-line no-undef
input = new Buffer(literals);
// input = new Buffer(literals);
input = Buffer.from(literals);
}
// const output = __non_webpack_require__('zlib')
const output = zlib
.deflateSync(input, { level: 9, });
.deflateSync(input, { level: 9 });
return output instanceof Uint8Array ? output : new Uint8Array(output);
} catch (e) {
warn('Not compressing PNG because zlib.deflateSync is unavailable: ' + e);
console.warn('Not compressing PNG because zlib.deflateSync is unavailable: ' + e);
}
return deflateSyncUncompressed(literals);
@ -182,7 +183,8 @@ const convertImgDataToPng = (function() {
// prefix every row with predictor 0
const literals = new Uint8Array((1 + lineSize) * height);
let offsetLiterals = 0, offsetBytes = 0;
let offsetLiterals = 0;
let offsetBytes = 0;
for (let y = 0; y < height; ++y) {
literals[offsetLiterals++] = 0; // no prediction
literals.set(bytes.subarray(offsetBytes, offsetBytes + lineSize),
@ -215,7 +217,7 @@ const convertImgDataToPng = (function() {
colorType, // color type
0x00, // compression method
0x00, // filter method
0x00 // interlace method
0x00, // interlace method
]);
const idat = deflateSync(literals);
@ -236,8 +238,7 @@ const convertImgDataToPng = (function() {
}
return function convertImgDataToPng (imgData, forceDataSchema, isMask) {
const kind = (imgData.kind === undefined ?
ImageKind.GRAYSCALE_1BPP : imgData.kind);
const kind = (imgData.kind === undefined ? ImageKind.GRAYSCALE_1BPP : imgData.kind);
return encode(imgData, kind, forceDataSchema, isMask);
};
})();