diff --git a/gd.js b/gd.js index cb2c134..86042d8 100644 --- a/gd.js +++ b/gd.js @@ -3,7 +3,7 @@ const GoldDigger = require('./src/GoldDigger'); const FileManager = require('./src/pdf/FileManager'); -const supportedFormat = ['text', 'json']; +const supportedFormat = ['text', 'json', 'xml']; const ERR_INVALID_FORMAT = ` Invalid output Please specify one of those values : "${supportedFormat}" diff --git a/src/pdf/formatters/FormatterXML.js b/src/pdf/formatters/FormatterXML.js index 53c99bc..79f2983 100644 --- a/src/pdf/formatters/FormatterXML.js +++ b/src/pdf/formatters/FormatterXML.js @@ -14,6 +14,59 @@ class FormatterXML { `; } + /** + * Formats {@link TextObject} to xml object + * @param {TextObject} textObject + * @returns {object} + */ + formatTextObject (textObject) { + let txtObjOut = `\n`; + const lines = textObject.getData(); + lines.sort((a, b) => { + if (a.y > b.y) return -1; + if (a.y < b.y) return 1; + return 0; + }); + lines.forEach(textLine => { + txtObjOut += this.formatTextLine(textLine); + }); + txtObjOut += ''; + return txtObjOut; + } + + /** + * Formats {@link TextLine} to xml object + * @param {TextLine} textLine + * @returns {object} + */ + formatTextLine (textLine) { + let txtLineOut = `\n`; + textLine.getText().forEach(textFont => { + txtLineOut += this.formatTextFont(textFont); + }); + txtLineOut += '\n'; + return txtLineOut; + } + + /** + * Formats {@link TextFont} to xml object + * @param {TextFont} textFont + * @returns {object} + */ + formatTextFont (textFont) { + return `${textFont.getText()}\n`; + } + /** * See {@link Formatter} * @param page @@ -22,8 +75,12 @@ class FormatterXML { * @returns {string} */ format (page, data, last) { - const output = ''; - return output; + let out = '\n'; + data.forEach(textObject => { + out += this.formatTextObject(textObject) + '\n'; + }); + out += ''; + return out; } /**