commit 25401982b163e5b64d5b925375458385156282e6 Author: Michal Szczepanski Date: Wed Jan 13 09:41:20 2021 +0100 Add data gathering code diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bff2d76 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.iml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6cbfd06 --- /dev/null +++ b/LICENSE @@ -0,0 +1,26 @@ +Copyright 2021, Michal Szczepanski + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ceidg.py b/ceidg.py new file mode 100644 index 0000000..4956207 --- /dev/null +++ b/ceidg.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from bs4 import BeautifulSoup, Comment +from datetime import datetime +import requests +url = 'https://prod.ceidg.gov.pl/ceidg.cms.engine/Template/Includes/StatisticPage.aspx?Id=3814CF7F-246D-4CC3-8B89-88AA1395DF1D' +resp = requests.get(url) +soup = BeautifulSoup(resp.content, 'html.parser') +tdlist = soup.find_all('td') +registered = tdlist[1].text.strip() +renewed = tdlist[3].text.strip() +suspended = tdlist[5].text.strip() +closed = tdlist[7].text.strip() +all = BeautifulSoup(str(soup.find_all(string=lambda text: isinstance(text, Comment))[0]), 'html.parser').find_all('td')[1].text.strip() +dt = datetime.now().strftime('%Y-%m-%d') +with open(f'data/{dt}.txt', 'w+') as f: + f.write(f'{registered},{renewed},{suspended},{closed},{all}') diff --git a/data/2021-01-13.txt b/data/2021-01-13.txt new file mode 100644 index 0000000..dacc91c --- /dev/null +++ b/data/2021-01-13.txt @@ -0,0 +1 @@ +1090,310,780,630,2415617 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ffd820a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +beautifulsoup4==4.9.3 +bs4==0.0.1 +certifi==2020.12.5 +chardet==4.0.0 +idna==2.10 +requests==2.25.1 +soupsieve==2.1 +urllib3==1.26.2