Move test data generation outside perf test and remove multiple os size calls
This commit is contained in:
parent
f022b7c0dc
commit
e7c0372d59
21
README.md
21
README.md
@ -20,7 +20,7 @@ For simplicity data and index files is append only.
|
|||||||
Index file is loaded and stored as dictionary in memory on start.
|
Index file is loaded and stored as dictionary in memory on start.
|
||||||
|
|
||||||
#### Dependencies
|
#### Dependencies
|
||||||
Depends on psutil to display pid statistics when running main file.
|
Optional psutil to display pid statistics when running main file.
|
||||||
|
|
||||||
#### Run
|
#### Run
|
||||||
|
|
||||||
@ -29,8 +29,8 @@ I tested it using python3.7 but it can be easily converted to any python version
|
|||||||
By default it :
|
By default it :
|
||||||
- removes 2 files test.db, test.index if those files exists
|
- removes 2 files test.db, test.index if those files exists
|
||||||
- create 2 files test.db, test.index
|
- create 2 files test.db, test.index
|
||||||
- writes and index 100k random string with characters between (100, 1000) to test.db file
|
- writes and index 1 million random string with characters between (100, 1000) to test.db file (around 540MB)
|
||||||
- reads 100k random elements from file
|
- reads 1 million random elements from file (around 8MB)
|
||||||
- performs read of object at position 2
|
- performs read of object at position 2
|
||||||
- remove object from position 3
|
- remove object from position 3
|
||||||
- update object at position 2
|
- update object at position 2
|
||||||
@ -41,12 +41,13 @@ python dummy_crud_database.py
|
|||||||
|
|
||||||
#### Output
|
#### Output
|
||||||
```bash
|
```bash
|
||||||
write elements 100000 in 70.1026759147644
|
Test elements size 1000000
|
||||||
read elements 100000 in 3.7399983406066895
|
write elements in 35.61809206008911s - 28075.61950013945 per second
|
||||||
size : 100000
|
read elements in 13.677339792251587s - 73113.63285472477 per second
|
||||||
database fsize : 53.91 MB
|
size : 1000000
|
||||||
index fsize : 0.76 MB
|
database fsize : 539.06 MB
|
||||||
pid memory usage : 25.61 MB
|
index fsize : 7.63 MB
|
||||||
```
|
```
|
||||||
|
|
||||||
So it looks like it can do 1,4k inserts per second and around 26k reads per second on my computer (write time is including generation of random data).
|
|
||||||
|
So it looks like it can do 1,4k inserts per second and around 26k reads per second on my computer (write time including random choice from 1k elements array).
|
||||||
|
@ -33,6 +33,7 @@ class BaseFile:
|
|||||||
pathlib.Path(self.path).touch()
|
pathlib.Path(self.path).touch()
|
||||||
self.access = access
|
self.access = access
|
||||||
self.dbfile = None
|
self.dbfile = None
|
||||||
|
self.size = 0
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
self.open()
|
self.open()
|
||||||
@ -43,16 +44,14 @@ class BaseFile:
|
|||||||
|
|
||||||
def open(self):
|
def open(self):
|
||||||
self.dbfile = open(self.path, self.access)
|
self.dbfile = open(self.path, self.access)
|
||||||
if self.fsize() == 0:
|
self.size = os.path.getsize(self.path)
|
||||||
|
if self.size == 0:
|
||||||
Logger.info('write initial data')
|
Logger.info('write initial data')
|
||||||
self.dbfile.write(self._write_int(0))
|
self.dbfile.write(self._write_int(0))
|
||||||
self.dbfile.seek(0)
|
self.dbfile.seek(0)
|
||||||
|
self.size += INT_SIZE
|
||||||
self.dbfile.flush()
|
self.dbfile.flush()
|
||||||
|
|
||||||
def fsize(self):
|
|
||||||
s = os.path.getsize(self.path)
|
|
||||||
return s
|
|
||||||
|
|
||||||
def _write_int(self, i):
|
def _write_int(self, i):
|
||||||
return struct.pack('<I', i)
|
return struct.pack('<I', i)
|
||||||
|
|
||||||
@ -70,22 +69,22 @@ class IndexFile(BaseFile):
|
|||||||
|
|
||||||
def write(self, i, position):
|
def write(self, i, position):
|
||||||
self.idx[i] = position
|
self.idx[i] = position
|
||||||
self.dbfile.seek(self.fsize())
|
self.dbfile.seek(self.size)
|
||||||
self.dbfile.write(self._write_int(i))
|
self.dbfile.write(self._write_int(i))
|
||||||
self.dbfile.write(self._write_int(position))
|
self.dbfile.write(self._write_int(position))
|
||||||
|
self.size += INT_SIZE*2
|
||||||
self.dbfile.flush()
|
self.dbfile.flush()
|
||||||
|
|
||||||
def read_index(self):
|
def read_index(self):
|
||||||
self.BaseFile = {}
|
self.BaseFile = {}
|
||||||
index = INT_SIZE
|
index = INT_SIZE
|
||||||
end = self.fsize()
|
end = self.size
|
||||||
while index < end:
|
while index < end:
|
||||||
i = self._read_int(self.dbfile.read(INT_SIZE))
|
i = self._read_int(self.dbfile.read(INT_SIZE))
|
||||||
position = self._read_int(self.dbfile.read(INT_SIZE))
|
position = self._read_int(self.dbfile.read(INT_SIZE))
|
||||||
index += INT_SIZE*2
|
index += INT_SIZE*2
|
||||||
self.idx[i] = position
|
self.idx[i] = position
|
||||||
|
|
||||||
|
|
||||||
class CrudIndexFile():
|
class CrudIndexFile():
|
||||||
|
|
||||||
def __init__(self, dbpath='test.db', indexpath='test.index'):
|
def __init__(self, dbpath='test.db', indexpath='test.index'):
|
||||||
@ -104,7 +103,7 @@ class CrudIndexFile():
|
|||||||
|
|
||||||
def write(self, data):
|
def write(self, data):
|
||||||
data, size = self._get_data(data)
|
data, size = self._get_data(data)
|
||||||
end = self.base.fsize()
|
end = self.base.size
|
||||||
# calculate new number of elements
|
# calculate new number of elements
|
||||||
index = self._read_size()+1
|
index = self._read_size()+1
|
||||||
# go to end
|
# go to end
|
||||||
@ -115,11 +114,12 @@ class CrudIndexFile():
|
|||||||
# increase number of elements
|
# increase number of elements
|
||||||
self._write_size(size=index)
|
self._write_size(size=index)
|
||||||
self.idxdata.write(i=index, position=end)
|
self.idxdata.write(i=index, position=end)
|
||||||
|
self.base.size += HEADER_SIZE + size
|
||||||
self.base.dbfile.flush()
|
self.base.dbfile.flush()
|
||||||
|
|
||||||
def readall(self):
|
def readall(self):
|
||||||
position = INT_SIZE
|
position = INT_SIZE
|
||||||
end = self.fsize()
|
end = self.base.size
|
||||||
output = []
|
output = []
|
||||||
while position < end:
|
while position < end:
|
||||||
self.base.dbfile.seek(position)
|
self.base.dbfile.seek(position)
|
||||||
@ -142,7 +142,7 @@ class CrudIndexFile():
|
|||||||
position = self.base.dbfile.tell()
|
position = self.base.dbfile.tell()
|
||||||
# got ot header and override with status updated and set skip to end of file
|
# got ot header and override with status updated and set skip to end of file
|
||||||
self.base.dbfile.seek(position-HEADER_SIZE)
|
self.base.dbfile.seek(position-HEADER_SIZE)
|
||||||
end = self.base.fsize()
|
end = self.base.size
|
||||||
self._write_header(size=size, index=idx, status=STATUS_UPDATED, skip=end)
|
self._write_header(size=size, index=idx, status=STATUS_UPDATED, skip=end)
|
||||||
# read old value
|
# read old value
|
||||||
old = self.base.dbfile.read(size).decode('utf-8')
|
old = self.base.dbfile.read(size).decode('utf-8')
|
||||||
@ -152,6 +152,7 @@ class CrudIndexFile():
|
|||||||
data, size = self._get_data(data)
|
data, size = self._get_data(data)
|
||||||
self._write_header(size=size, index=idx, status=STATUS_OK, skip=0)
|
self._write_header(size=size, index=idx, status=STATUS_OK, skip=0)
|
||||||
self.base.dbfile.write(data)
|
self.base.dbfile.write(data)
|
||||||
|
self.base.size += HEADER_SIZE + size
|
||||||
self.base.dbfile.flush()
|
self.base.dbfile.flush()
|
||||||
return old
|
return old
|
||||||
|
|
||||||
@ -175,7 +176,7 @@ class CrudIndexFile():
|
|||||||
|
|
||||||
def seek_data(self, index):
|
def seek_data(self, index):
|
||||||
position = self.idxdata.idx.get(index)
|
position = self.idxdata.idx.get(index)
|
||||||
end = self.base.fsize()
|
end = self.base.size
|
||||||
while position < end:
|
while position < end:
|
||||||
self.base.dbfile.seek(position)
|
self.base.dbfile.seek(position)
|
||||||
status, idx, skip, size = self._read_header()
|
status, idx, skip, size = self._read_header()
|
||||||
@ -223,24 +224,34 @@ if __name__ == '__main__':
|
|||||||
if os.path.exists(idxpath):
|
if os.path.exists(idxpath):
|
||||||
os.remove(idxpath)
|
os.remove(idxpath)
|
||||||
rstring = lambda size: ''.join(random.choice(string.ascii_letters) for i in range(size))
|
rstring = lambda size: ''.join(random.choice(string.ascii_letters) for i in range(size))
|
||||||
a = time.time()
|
test_size = 1000000
|
||||||
write_elements = 100000
|
Logger.info('Test elements size {}'.format(test_size))
|
||||||
read_elements = 100000
|
|
||||||
with CrudIndexFile() as crud:
|
with CrudIndexFile() as crud:
|
||||||
for i in range(write_elements+1):
|
test_data = []
|
||||||
crud.write(rstring(random.randrange(100, 1000)))
|
for i in range(0, 1001):
|
||||||
|
test_data.append(rstring(random.randrange(100, 1000)))
|
||||||
|
test_data_len = len(test_data)
|
||||||
|
a = time.time()
|
||||||
|
for i in range(test_size+1):
|
||||||
|
crud.write(random.choice(test_data))
|
||||||
|
if i % 10000 == 0:
|
||||||
|
Logger.info('write {}'.format(i))
|
||||||
size = crud.size()
|
size = crud.size()
|
||||||
print("write elements {} in {}".format(write_elements, time.time() - a))
|
t = time.time() - a
|
||||||
|
print("write elements in {}s - {} per second".format(t, test_size/t))
|
||||||
b = time.time()
|
b = time.time()
|
||||||
for i in range(0, read_elements+1):
|
for i in range(0, test_size+1):
|
||||||
crud.read(random.randrange(1, size))
|
crud.read(random.randrange(1, size))
|
||||||
print("read elements {} in {}".format(read_elements, time.time() - b))
|
if i % 10000 == 0:
|
||||||
Logger.info('read index 2 : ', crud.read(index=2))
|
Logger.info('read {}'.format(i))
|
||||||
Logger.info('remove index 3 : ', crud.delete(index=3))
|
t = time.time() - b
|
||||||
Logger.info('update index 2 : ', crud.update(index=2, data=rstring(85)))
|
print("read elements in {}s - {} per second".format(t, test_size/t))
|
||||||
Logger.info('read index {} : '.format(size), crud.read(index=size))
|
crud.read(index=2)
|
||||||
|
crud.delete(index=3)
|
||||||
|
crud.update(index=2, data=rstring(85))
|
||||||
|
crud.read(index=size)
|
||||||
Logger.info('size : ', crud.size())
|
Logger.info('size : ', crud.size())
|
||||||
Logger.info('database fsize : ', process_size.convert_size(crud.base.fsize(), 2))
|
Logger.info('database fsize : ', process_size.convert_size(crud.base.size, 2))
|
||||||
Logger.info('index fsize : ', process_size.convert_size(crud.idxdata.fsize(), 2))
|
Logger.info('index fsize : ', process_size.convert_size(crud.idxdata.size, 2))
|
||||||
Logger.info('pid : ', process_size.get_size())
|
Logger.info('pid : ', process_size.get_size())
|
||||||
Logger.info('total : {}'.format(time.time() - a))
|
Logger.info('total : {}'.format(time.time() - a))
|
||||||
|
@ -2,7 +2,10 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
|
try:
|
||||||
import psutil
|
import psutil
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
def convert_size(size_bytes, index=0):
|
def convert_size(size_bytes, index=0):
|
||||||
if size_bytes == 0:
|
if size_bytes == 0:
|
||||||
@ -15,5 +18,8 @@ def convert_size(size_bytes, index=0):
|
|||||||
return "%s %s" % (s, size_name[index])
|
return "%s %s" % (s, size_name[index])
|
||||||
|
|
||||||
def get_size():
|
def get_size():
|
||||||
|
try:
|
||||||
process = psutil.Process(os.getpid())
|
process = psutil.Process(os.getpid())
|
||||||
return convert_size(process.memory_info().rss, 2)
|
return convert_size(process.memory_info().rss, 2)
|
||||||
|
except:
|
||||||
|
return 0
|
||||||
|
Loading…
Reference in New Issue
Block a user