Add image data information to output xml, json formatters
This commit is contained in:
parent
e8685c78af
commit
0519406a25
@ -1,3 +1,5 @@
|
||||
const Model = require('../model');
|
||||
|
||||
/**
|
||||
* Format PDF into json data
|
||||
*/
|
||||
@ -43,6 +45,20 @@ class FormatterJSON {
|
||||
return txtObjOut;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format image object
|
||||
* @param {ImageObject} imageObject
|
||||
*/
|
||||
formatImageObject (imageObject) {
|
||||
return {
|
||||
x: imageObject.x,
|
||||
y: imageObject.y,
|
||||
width: imageObject.width,
|
||||
height: imageObject.height,
|
||||
name: imageObject.name,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats {@link TextLine} to JSON serializable object
|
||||
* @param {TextLine} textLine
|
||||
@ -92,14 +108,20 @@ class FormatterJSON {
|
||||
*/
|
||||
format (page, data, last) {
|
||||
const txtData = [];
|
||||
data.forEach(textObject => {
|
||||
const txtObjOut = this.formatTextObject(textObject);
|
||||
txtData.push(txtObjOut);
|
||||
data.forEach(pdfObject => {
|
||||
if (pdfObject instanceof Model.TextObject) {
|
||||
const txtObjOut = this.formatTextObject(pdfObject);
|
||||
txtData.push(txtObjOut);
|
||||
} else if (pdfObject instanceof Model.ImageObject) {
|
||||
const imgObjOut = this.formatImageObject(pdfObject);
|
||||
txtData.push(imgObjOut);
|
||||
} else {
|
||||
console.warn(`Not recognised object ${pdfObject}`);
|
||||
}
|
||||
});
|
||||
const output = {
|
||||
const out = JSON.stringify({
|
||||
data: txtData,
|
||||
};
|
||||
const out = JSON.stringify(output); // pretty print (output, null, 4)
|
||||
}); // pretty print (output, null, 4)
|
||||
return `"${page.pageIndex}": ${out}${last ? '' : ','}`;
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
const Model = require('../model');
|
||||
/**
|
||||
* Format PDF into text data
|
||||
*/
|
||||
@ -64,9 +65,14 @@ class FormatterText {
|
||||
*/
|
||||
format (page, data, last) {
|
||||
let output = '';
|
||||
data.forEach(textObject => {
|
||||
const txtObjOut = this.formatTextObject(textObject);
|
||||
output += txtObjOut;
|
||||
data.forEach(pdfObject => {
|
||||
if (pdfObject instanceof Model.TextObject) {
|
||||
output += this.formatTextObject(pdfObject);
|
||||
} else if (pdfObject instanceof Model.ImageObject) {
|
||||
// TODO add config to save image position
|
||||
} else {
|
||||
console.warn(`Not recognised object ${pdfObject}`);
|
||||
}
|
||||
});
|
||||
return output;
|
||||
}
|
||||
|
@ -1,3 +1,5 @@
|
||||
const Model = require('../model');
|
||||
|
||||
/**
|
||||
* Format PDF into xml data
|
||||
*/
|
||||
@ -30,10 +32,22 @@ class FormatterXML {
|
||||
lines.forEach(textLine => {
|
||||
txtObjOut += this.formatTextLine(textLine);
|
||||
});
|
||||
txtObjOut += '</object>';
|
||||
txtObjOut += '</object>\n';
|
||||
return txtObjOut;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format image object
|
||||
* @param {ImageObject} imageObject
|
||||
*/
|
||||
formatImageObject (imageObject) {
|
||||
return `<image
|
||||
x="${imageObject.x}"
|
||||
y="${imageObject.y}"
|
||||
width="${imageObject.width}"
|
||||
height="${imageObject.height}">${imageObject.name}</image>\n`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats {@link TextLine} to xml object
|
||||
* @param {TextLine} textLine
|
||||
@ -76,8 +90,14 @@ class FormatterXML {
|
||||
*/
|
||||
format (page, data, last) {
|
||||
let out = '<data>\n';
|
||||
data.forEach(textObject => {
|
||||
out += this.formatTextObject(textObject) + '\n';
|
||||
data.forEach(pdfObject => {
|
||||
if (pdfObject instanceof Model.TextObject) {
|
||||
out += this.formatTextObject(pdfObject);
|
||||
} else if (pdfObject instanceof Model.ImageObject) {
|
||||
out += this.formatImageObject(pdfObject);
|
||||
} else {
|
||||
console.warn(`Not recognised object ${pdfObject}`);
|
||||
}
|
||||
});
|
||||
out += '</data>';
|
||||
return out;
|
||||
|
43
src/pdf/model/ImageObject.js
Normal file
43
src/pdf/model/ImageObject.js
Normal file
@ -0,0 +1,43 @@
|
||||
const PdfObject = require('./PdfObject');
|
||||
|
||||
/**
|
||||
* Information about images
|
||||
*/
|
||||
class ImageObject extends PdfObject {
|
||||
/**
|
||||
* Constructor
|
||||
*/
|
||||
constructor () {
|
||||
super();
|
||||
/**
|
||||
* @type {string} saved image name
|
||||
*/
|
||||
this.name = '';
|
||||
/**
|
||||
* @type {number} document image width
|
||||
*/
|
||||
this.width = 0;
|
||||
/**
|
||||
* @type {number} document image height
|
||||
*/
|
||||
this.height = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill properties about this object
|
||||
* @param {string} name
|
||||
* @param {number} x
|
||||
* @param {number} y
|
||||
* @param {number} width
|
||||
* @param {number} height
|
||||
*/
|
||||
fill (name, x, y, width, height) {
|
||||
this.name = name;
|
||||
this.x = x;
|
||||
this.y = y;
|
||||
this.width = width;
|
||||
this.height = height;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ImageObject;
|
@ -33,6 +33,14 @@ class PdfPage extends PdfObject {
|
||||
this.fonts = {};
|
||||
}
|
||||
|
||||
/**
|
||||
* Add image to object list
|
||||
* @param {ImageObject} image
|
||||
*/
|
||||
addImage (image) {
|
||||
this.objectList.push(image);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set current object and add it to objectList
|
||||
* @param {TextObject} obj
|
||||
|
@ -1,5 +1,6 @@
|
||||
const FontObject = require('./FontObject');
|
||||
const PdfObject = require('./PdfObject');
|
||||
const ImageObject = require('./ImageObject');
|
||||
const PdfPage = require('./PdfPage');
|
||||
const TextObject = require('./TextObject');
|
||||
const TextFont = require('./text/TextFont');
|
||||
@ -12,4 +13,5 @@ module.exports = {
|
||||
TextFont,
|
||||
TextLine,
|
||||
PdfPage,
|
||||
ImageObject,
|
||||
};
|
||||
|
@ -1,4 +1,5 @@
|
||||
const VisitorBase = require('./VisitorBase');
|
||||
const Model = require('../model');
|
||||
const pdfjs = require('../../pdfjs');
|
||||
const FileManager = require('../FileManager');
|
||||
|
||||
@ -59,7 +60,11 @@ class VisitorImage extends VisitorBase {
|
||||
// TODO imlement mask
|
||||
const mask = false;
|
||||
const imgBinary = pdfjs.convertImgDataToPng(imgData, this.forceDataSchema, !!mask);
|
||||
const fpath = `${this.config.outputDir}/img/page.${this.page.data.pageIndex}.${args[1]}.png`;
|
||||
const fname = `page.${this.page.data.pageIndex}.${args[1]}.png`;
|
||||
const fpath = `${this.config.outputDir}/img/${fname}`;
|
||||
const image = new Model.ImageObject();
|
||||
image.fill(fname, this.page.x, this.page.y, imgData.width, imgData.height);
|
||||
this.page.addImage(image);
|
||||
await FileManager.saveFileAsync(fpath, imgBinary);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user