2021-01-13 09:41:20 +01:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding: utf-8 -*-
|
2023-02-01 15:32:31 +01:00
|
|
|
import os
|
2021-01-13 09:41:20 +01:00
|
|
|
from bs4 import BeautifulSoup, Comment
|
|
|
|
from datetime import datetime
|
|
|
|
import requests
|
2021-02-04 15:52:29 +01:00
|
|
|
import urllib3
|
2021-02-04 20:48:29 +01:00
|
|
|
|
2024-01-15 11:38:38 +01:00
|
|
|
url = 'https://prod.ceidg.gov.pl/ceidg.cms.engine/template/includes/statisticpage.aspx?id=3814cf7f-246d-4cc3-8b89-88aa1395df1d'
|
|
|
|
|
2021-01-13 09:41:20 +01:00
|
|
|
resp = requests.get(url)
|
|
|
|
soup = BeautifulSoup(resp.content, 'html.parser')
|
|
|
|
tdlist = soup.find_all('td')
|
|
|
|
registered = tdlist[1].text.strip()
|
|
|
|
renewed = tdlist[3].text.strip()
|
|
|
|
suspended = tdlist[5].text.strip()
|
|
|
|
closed = tdlist[7].text.strip()
|
2023-02-01 15:32:31 +01:00
|
|
|
|
2021-01-13 09:41:20 +01:00
|
|
|
all = BeautifulSoup(str(soup.find_all(string=lambda text: isinstance(text, Comment))[0]), 'html.parser').find_all('td')[1].text.strip()
|
2023-02-01 15:32:31 +01:00
|
|
|
|
2024-01-15 11:38:38 +01:00
|
|
|
print(registered, renewed, suspended, closed, all)
|
|
|
|
|
2023-02-01 15:32:31 +01:00
|
|
|
dt = datetime.now()
|
|
|
|
fname = dt.strftime('%Y-%m-%d')
|
|
|
|
dt_folder_name = dt.strftime('%Y')
|
|
|
|
if not os.path.exists(f'data/{dt_folder_name}'):
|
|
|
|
os.makedirs(f'data/{dt_folder_name}')
|
|
|
|
|
|
|
|
with open(f'data/{dt_folder_name}/{fname}.txt', 'w+') as f:
|
2021-01-13 09:41:20 +01:00
|
|
|
f.write(f'{registered},{renewed},{suspended},{closed},{all}')
|