Add xml formatter for xml output closes #1

This commit is contained in:
Michal Szczepanski 2019-07-28 14:57:11 +02:00
parent 255de1f3c2
commit c74c037d2a
2 changed files with 60 additions and 3 deletions

2
gd.js

@ -3,7 +3,7 @@ const GoldDigger = require('./src/GoldDigger');
const FileManager = require('./src/pdf/FileManager');
const supportedFormat = ['text', 'json'];
const supportedFormat = ['text', 'json', 'xml'];
const ERR_INVALID_FORMAT = `
Invalid output
Please specify one of those values : "${supportedFormat}"

@ -14,6 +14,59 @@ class FormatterXML {
`;
}
/**
* Formats {@link TextObject} to xml object
* @param {TextObject} textObject
* @returns {object}
*/
formatTextObject (textObject) {
let txtObjOut = `<object x="${textObject.x}" y="${textObject.y}" matrix="${textObject.textMatrix}">\n`;
const lines = textObject.getData();
lines.sort((a, b) => {
if (a.y > b.y) return -1;
if (a.y < b.y) return 1;
return 0;
});
lines.forEach(textLine => {
txtObjOut += this.formatTextLine(textLine);
});
txtObjOut += '</object>';
return txtObjOut;
}
/**
* Formats {@link TextLine} to xml object
* @param {TextLine} textLine
* @returns {object}
*/
formatTextLine (textLine) {
let txtLineOut = `<line y="${textLine.y}">\n`;
textLine.getText().forEach(textFont => {
txtLineOut += this.formatTextFont(textFont);
});
txtLineOut += '</line>\n';
return txtLineOut;
}
/**
* Formats {@link TextFont} to xml object
* @param {TextFont} textFont
* @returns {object}
*/
formatTextFont (textFont) {
return `<text
size="${textFont.font.size}"
direction="${textFont.font.direction}"
family="${textFont.font.family}"
style="${textFont.font.style}"
weight="${textFont.font.weight}"
vertical="${textFont.font.vertical}"
x="${textFont.x}"
y="${textFont.y}"
char-spacing="${textFont.charSpacing}"
word-spacing="${textFont.wordSpacing}">${textFont.getText()}</text>\n`;
}
/**
* See {@link Formatter}
* @param page
@ -22,8 +75,12 @@ class FormatterXML {
* @returns {string}
*/
format (page, data, last) {
const output = '';
return output;
let out = '<data>\n';
data.forEach(textObject => {
out += this.formatTextObject(textObject) + '\n';
});
out += '</data>';
return out;
}
/**