mirror of
https://github.com/blockchain-etl/ethereum-etl.git
synced 2026-01-09 13:57:54 -05:00
Merge pull request #50 from medvedev1088/feature/get-block-range-for-date
Feature/get block range for date
This commit is contained in:
14
README.md
14
README.md
@@ -215,6 +215,13 @@ Additional steps:
|
||||
|
||||
#### Command Reference
|
||||
|
||||
- [export_blocks_and_transactions.py](#export_blocks_and_transactionspy)
|
||||
- [export_erc20_transfers.py](#export_erc20_transferspy)
|
||||
- [export_receipts_and_logs.py](#export_receipts_and_logspy)
|
||||
- [export_contracts.py](#export_contractspy)
|
||||
- [export_erc20_tokens.py](#export_erc20_tokenspy)
|
||||
- [get_block_range_for_date.py](#get_block_range_for_datepy)
|
||||
|
||||
All the commands accept `-h` parameter for help, e.g.:
|
||||
|
||||
```bash
|
||||
@@ -342,6 +349,13 @@ which need to be deduplicated (see Querying in Google BigQuery section).
|
||||
Upvote this pull request to make erc20_tokens export faster
|
||||
https://github.com/ethereum/web3.py/pull/944#issuecomment-403957468
|
||||
|
||||
##### get_block_range_for_date.py
|
||||
|
||||
```bash
|
||||
> python get_block_range_for_date.py --provider-uri=https://mainnet.infura.io/ --date 2018-01-01
|
||||
4832686,4838611
|
||||
```
|
||||
|
||||
#### Running Tests
|
||||
|
||||
```bash
|
||||
|
||||
58
ethereumetl/service/eth_service.py
Normal file
58
ethereumetl/service/eth_service.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from ethereumetl.service.graph_operations import GraphOperations, OutOfBoundsError, Point
|
||||
|
||||
|
||||
class EthService(object):
|
||||
def __init__(self, web3):
|
||||
graph = BlockTimestampGraph(web3)
|
||||
self._graph_operations = GraphOperations(graph)
|
||||
|
||||
def get_block_range_for_date(self, date):
|
||||
start_datetime = datetime.combine(date, datetime.min.time(), tzinfo=timezone.utc)
|
||||
end_datetime = datetime.combine(date, datetime.max.time(), tzinfo=timezone.utc)
|
||||
return self.get_block_range_for_timestamps(start_datetime.timestamp(), end_datetime.timestamp())
|
||||
|
||||
def get_block_range_for_timestamps(self, start_timestamp, end_timestamp):
|
||||
start_timestamp = int(start_timestamp)
|
||||
end_timestamp = int(end_timestamp)
|
||||
if start_timestamp > end_timestamp:
|
||||
raise ValueError('start_timestamp must be greater or equal to end_timestamp')
|
||||
|
||||
try:
|
||||
start_block_bounds = self._graph_operations.get_bounds_for_y_coordinate(start_timestamp)
|
||||
except OutOfBoundsError:
|
||||
start_block_bounds = (0, 0)
|
||||
|
||||
end_block_bounds = self._graph_operations.get_bounds_for_y_coordinate(end_timestamp)
|
||||
|
||||
if start_block_bounds == end_block_bounds and start_block_bounds[0] != start_block_bounds[1]:
|
||||
raise ValueError('The given timestamp range does not cover any blocks')
|
||||
|
||||
start_block = start_block_bounds[1]
|
||||
end_block = end_block_bounds[0]
|
||||
|
||||
# The genesis block has timestamp 0 but we include it with the 1st block.
|
||||
if start_block == 1:
|
||||
start_block = 0
|
||||
|
||||
return start_block, end_block
|
||||
|
||||
|
||||
class BlockTimestampGraph(object):
|
||||
def __init__(self, web3):
|
||||
self._web3 = web3
|
||||
|
||||
def get_first_point(self):
|
||||
# Ignore the genesis block as its timestamp is 0
|
||||
return block_to_point(self._web3.eth.getBlock(1))
|
||||
|
||||
def get_last_point(self):
|
||||
return block_to_point(self._web3.eth.getBlock('latest'))
|
||||
|
||||
def get_point(self, x):
|
||||
return block_to_point(self._web3.eth.getBlock(x))
|
||||
|
||||
|
||||
def block_to_point(block):
|
||||
return Point(block.number, block.timestamp)
|
||||
118
ethereumetl/service/graph_operations.py
Normal file
118
ethereumetl/service/graph_operations.py
Normal file
@@ -0,0 +1,118 @@
|
||||
from ethereumetl.utils import pairwise
|
||||
|
||||
|
||||
class GraphOperations(object):
|
||||
def __init__(self, graph):
|
||||
"""x axis on the graph must be integers, y value must increase strictly monotonically with increase of x"""
|
||||
self._graph = graph
|
||||
self._cached_points = []
|
||||
|
||||
def get_bounds_for_y_coordinate(self, y):
|
||||
"""given the y coordinate, outputs a pair of x coordinates for closest points that bound the y coordinate.
|
||||
Left and right bounds are equal in case given y is equal to one of the points y coordinate"""
|
||||
initial_bounds = find_best_bounds(y, self._cached_points)
|
||||
if initial_bounds is None:
|
||||
initial_bounds = self._get_first_point(), self._get_last_point()
|
||||
|
||||
result = self._get_bounds_for_y_coordinate_recursive(y, *initial_bounds)
|
||||
return result
|
||||
|
||||
def _get_bounds_for_y_coordinate_recursive(self, y, start, end):
|
||||
if y < start.y or y > end.y:
|
||||
raise OutOfBoundsError('y coordinate {} is out of bounds for points {}-{}'.format(y, start, end))
|
||||
|
||||
if y == start.y:
|
||||
return start.x, start.x
|
||||
elif y == end.y:
|
||||
return end.x, end.x
|
||||
elif (end.x - start.x) <= 1:
|
||||
return start.x, end.x
|
||||
else:
|
||||
assert start.y < y < end.y
|
||||
if start.y >= end.y:
|
||||
raise ValueError('y must increase strictly monotonically')
|
||||
|
||||
# Find the 1st estimation by linear interpolation from start and end points.
|
||||
# If the 1st estimation is below the needed y coordinate (graph is concave),
|
||||
# drop the next estimation by interpolating with the start and 1st estimation point (likely will be above the needed y).
|
||||
# If 1st estimation is above the needed y coordinate (graph is convex),
|
||||
# drop the next estimation by interpolating with the 1st estimation and end point (likely will be below the needed y.
|
||||
# Still runs in log(n) time but about 2-3 times faster than the naive dichotomy method.
|
||||
|
||||
estimation1_x = interpolate(start, end, y)
|
||||
estimation1_x = bound(estimation1_x, (start.x, end.x))
|
||||
estimation1 = self._get_point(estimation1_x)
|
||||
|
||||
if estimation1.y < y:
|
||||
points = (start, estimation1)
|
||||
else:
|
||||
points = (estimation1, end)
|
||||
|
||||
estimation2_x = interpolate(*points, y)
|
||||
estimation2_x = bound(estimation2_x, (start.x, end.x))
|
||||
estimation2 = self._get_point(estimation2_x)
|
||||
|
||||
all_points = [start, estimation1, estimation2, end]
|
||||
|
||||
bounds = find_best_bounds(y, all_points)
|
||||
if bounds is None:
|
||||
raise ValueError('Unable to find bounds for points {} and y coordinate {}'.format(points, y))
|
||||
|
||||
return self._get_bounds_for_y_coordinate_recursive(y, *bounds)
|
||||
|
||||
def _get_point(self, x):
|
||||
point = self._graph.get_point(x)
|
||||
self._cached_points.append(point)
|
||||
return point
|
||||
|
||||
def _get_first_point(self):
|
||||
point = self._graph.get_first_point()
|
||||
self._cached_points.append(point)
|
||||
return point
|
||||
|
||||
def _get_last_point(self):
|
||||
point = self._graph.get_last_point()
|
||||
self._cached_points.append(point)
|
||||
return point
|
||||
|
||||
|
||||
def find_best_bounds(y, points):
|
||||
sorted_points = sorted(points, key=lambda point: point.y)
|
||||
for point1, point2 in pairwise(sorted_points):
|
||||
if point1.y <= y <= point2.y:
|
||||
return point1, point2
|
||||
return None
|
||||
|
||||
|
||||
def interpolate(point1, point2, y):
|
||||
x1, y1 = point1.x, point1.y
|
||||
x2, y2 = point2.x, point2.y
|
||||
if y1 == y2:
|
||||
raise ValueError('The y coordinate for points is the same {}, {}'.format(point1, point2))
|
||||
x = int((y - y1) * (x2 - x1) / (y2 - y1) + x1)
|
||||
return x
|
||||
|
||||
|
||||
def bound(x, bounds):
|
||||
x1, x2 = bounds
|
||||
if x1 > x2:
|
||||
x1, x2 = x2, x1
|
||||
if x <= x1:
|
||||
return x1 + 1
|
||||
elif x >= x2:
|
||||
return x2 - 1
|
||||
else:
|
||||
return x
|
||||
|
||||
|
||||
class OutOfBoundsError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Point(object):
|
||||
def __init__(self, x, y):
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def __str__(self):
|
||||
return '({},{})'.format(self.x, self.y)
|
||||
@@ -1,3 +1,6 @@
|
||||
import itertools
|
||||
|
||||
|
||||
def hex_to_dec(hex_string):
|
||||
if hex_string is None:
|
||||
return None
|
||||
@@ -44,3 +47,10 @@ def dynamic_batch_iterator(iterable, batch_size_getter):
|
||||
batch_size = batch_size_getter()
|
||||
if len(batch) > 0:
|
||||
yield batch
|
||||
|
||||
|
||||
def pairwise(iterable):
|
||||
"""s -> (s0,s1), (s1,s2), (s2, s3), ..."""
|
||||
a, b = itertools.tee(iterable)
|
||||
next(b, None)
|
||||
return zip(a, b)
|
||||
|
||||
15
ethereumetl/web3_utils.py
Normal file
15
ethereumetl/web3_utils.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from web3 import IPCProvider, HTTPProvider
|
||||
|
||||
DEFAULT_IPC_TIMEOUT = 60
|
||||
|
||||
|
||||
def get_provider_from_uri(uri_string):
|
||||
uri = urlparse(uri_string)
|
||||
if uri.scheme == 'file':
|
||||
return IPCProvider(uri.path, timeout=DEFAULT_IPC_TIMEOUT)
|
||||
elif uri.scheme == 'http' or uri.scheme == 'https':
|
||||
return HTTPProvider(uri_string)
|
||||
else:
|
||||
raise ValueError('Unknown uri scheme {}'.format(uri_string))
|
||||
@@ -5,7 +5,7 @@ from ethereumetl.csv_utils import set_max_field_size_limit
|
||||
from ethereumetl.file_utils import smart_open
|
||||
|
||||
parser = argparse.ArgumentParser(description='Extracts a single column from a given csv file.')
|
||||
parser.add_argument('-i', '--input', default=None, type=str, help='The input file. If not specified stdin is used.')
|
||||
parser.add_argument('-i', '--input', default='-', type=str, help='The input file. If not specified stdin is used.')
|
||||
parser.add_argument('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
parser.add_argument('-c', '--column', required=True, type=str, help='The csv column name to extract.')
|
||||
|
||||
|
||||
26
get_block_range_for_date.py
Normal file
26
get_block_range_for_date.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
|
||||
from web3 import Web3
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from ethereumetl.service.eth_service import EthService
|
||||
from ethereumetl.web3_utils import get_provider_from_uri
|
||||
|
||||
parser = argparse.ArgumentParser(description='Outputs the start block and end block for a given date.')
|
||||
parser.add_argument('-p', '--provider-uri', default=None, type=str,
|
||||
help='The URI of the web3 provider e.g. file://$HOME/Library/Ethereum/geth.ipc. or https://mainnet.infura.io/')
|
||||
parser.add_argument('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
parser.add_argument('-d', '--date', required=True, type=lambda d: datetime.strptime(d, '%Y-%m-%d'),
|
||||
help='The date e.g. 2018-01-01.')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
provider = get_provider_from_uri(args.provider_uri)
|
||||
web3 = Web3(provider)
|
||||
eth_service = EthService(web3)
|
||||
|
||||
start_block, end_block = eth_service.get_block_range_for_date(args.date)
|
||||
|
||||
with smart_open(args.output, 'w') as output_file:
|
||||
output_file.write('{},{}'.format(start_block, end_block))
|
||||
@@ -1,2 +1,3 @@
|
||||
web3~=4.3.0
|
||||
pytest~=3.2.0
|
||||
python-dateutil~=2.7.0
|
||||
|
||||
64
tests/ethereumetl/service/test_eth_service.py
Normal file
64
tests/ethereumetl/service/test_eth_service.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from dateutil.parser import parse
|
||||
from web3 import HTTPProvider, Web3
|
||||
|
||||
from ethereumetl.service.eth_service import EthService
|
||||
from ethereumetl.service.graph_operations import OutOfBoundsError
|
||||
|
||||
run_slow_tests = os.environ.get('ETHEREUM_ETL_RUN_SLOW_TESTS', None) == '1'
|
||||
skip_slow_tests = pytest.mark.skipif(not run_slow_tests, reason='Slow running tests')
|
||||
|
||||
|
||||
@skip_slow_tests
|
||||
@pytest.mark.parametrize("date,expected_start_block,expected_end_block", [
|
||||
('2015-07-30', 0, 6911),
|
||||
('2015-07-31', 6912, 13774),
|
||||
('2017-01-01', 2912407, 2918517),
|
||||
('2017-01-02', 2918518, 2924575),
|
||||
('2018-06-10', 5761663, 5767303)
|
||||
])
|
||||
def test_get_block_range_for_date(date, expected_start_block, expected_end_block):
|
||||
eth_service = get_new_eth_service()
|
||||
parsed_date = parse(date)
|
||||
blocks = eth_service.get_block_range_for_date(parsed_date)
|
||||
assert blocks == (expected_start_block, expected_end_block)
|
||||
|
||||
|
||||
@skip_slow_tests
|
||||
@pytest.mark.parametrize("date", [
|
||||
'2015-07-29',
|
||||
'2030-01-01'
|
||||
])
|
||||
def test_get_block_range_for_date_fail(date):
|
||||
eth_service = get_new_eth_service()
|
||||
parsed_date = parse(date)
|
||||
with pytest.raises(OutOfBoundsError):
|
||||
eth_service.get_block_range_for_date(parsed_date)
|
||||
|
||||
|
||||
@skip_slow_tests
|
||||
@pytest.mark.parametrize("start_timestamp,end_timestamp,expected_start_block,expected_end_block", [
|
||||
(1438270128, 1438270128, 10, 10),
|
||||
(1438270128, 1438270129, 10, 10)
|
||||
])
|
||||
def test_get_block_range_for_timestamps(start_timestamp, end_timestamp, expected_start_block, expected_end_block):
|
||||
eth_service = get_new_eth_service()
|
||||
blocks = eth_service.get_block_range_for_timestamps(start_timestamp, end_timestamp)
|
||||
assert blocks == (expected_start_block, expected_end_block)
|
||||
|
||||
|
||||
@skip_slow_tests
|
||||
@pytest.mark.parametrize("start_timestamp,end_timestamp", [
|
||||
(1438270129, 1438270131)
|
||||
])
|
||||
def test_get_block_range_for_timestamps_fail(start_timestamp, end_timestamp):
|
||||
eth_service = get_new_eth_service()
|
||||
with pytest.raises(ValueError):
|
||||
eth_service.get_block_range_for_timestamps(start_timestamp, end_timestamp)
|
||||
|
||||
|
||||
def get_new_eth_service():
|
||||
web3 = Web3(HTTPProvider('https://mainnet.infura.io/'))
|
||||
return EthService(web3)
|
||||
Reference in New Issue
Block a user