Add LICENSE, README update package.json with valid repository url

2019-07-22 20:20:37 +02:00 · 2019-07-22 20:20:37 +02:00 · 195ecdad63
commit 195ecdad63
parent c8d15caca3
3 changed files with 48 additions and 1 deletions
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) 2019 https://vane.pl
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,25 @@
+pdf-gold-digger
+====
+
+Pdf information extraction library based on [pdf.js](https://mozilla.github.io/pdf.js/)
+and [node.js](https://nodejs.org).
+
+## Work in progress
+
+### Supports:
+- extract text
+  - separate each page
+  - separate each line
+  - separate font information
+  - bounding box position 
+
+### TODO:
+- specify output format and output directory    
+- output to xml format
+- output to json format
+- extract images to files
+- extract font
+- extract tables
+- advanced font information
+- extract forms
+- extract drawings
--- a/package.json
+++ b/package.json
@ -1,10 +1,11 @@
 {
  "name": "pdf-gold-digger",
  "version": "0.0.1",
-  "description": "State of art pdf data extractor for nodejs.",
+  "description": "Pdf information extraction library based on pdf.js and node.js",
  "author": "Michal Szczepanski <michal@vane.pl>",
  "main": "main.js",
  "license": "MIT",
+  "repository": "szczepano/pdf-gold-digger",
  "dependencies": {
    "pdfjs-dist": "^2.1.266"
  }