mirror of
https://github.com/blockchain-etl/ethereum-etl.git
synced 2026-01-12 23:28:02 -05:00
Compare commits
203 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
51927defc7 | ||
|
|
4b92d7b670 | ||
|
|
0c4342fe11 | ||
|
|
41f20435a2 | ||
|
|
676dfb22c5 | ||
|
|
7cf0f34785 | ||
|
|
d46528ba24 | ||
|
|
87e6b57024 | ||
|
|
70db781856 | ||
|
|
f5836345cd | ||
|
|
d7ac8fb758 | ||
|
|
ded7a6a007 | ||
|
|
dd8d2bdc38 | ||
|
|
093fe56dde | ||
|
|
68fce399a8 | ||
|
|
438b911b0f | ||
|
|
dae8deff36 | ||
|
|
cb84071680 | ||
|
|
d882c64671 | ||
|
|
477eb35a39 | ||
|
|
ab7fd89774 | ||
|
|
064353a993 | ||
|
|
94b7ce8a4c | ||
|
|
c8e4c840d5 | ||
|
|
136ed3232a | ||
|
|
cf8c6edfb7 | ||
|
|
e90e70e94f | ||
|
|
64614c2670 | ||
|
|
1ffb592771 | ||
|
|
030c460f36 | ||
|
|
92db79b8a7 | ||
|
|
2d37486970 | ||
|
|
499596ad3e | ||
|
|
106de42844 | ||
|
|
aa106467b8 | ||
|
|
e53dbe13f9 | ||
|
|
38752a557a | ||
|
|
e8b6fe742e | ||
|
|
d1e2f83071 | ||
|
|
69c64e048e | ||
|
|
1d4aa94d81 | ||
|
|
2b23e08a64 | ||
|
|
7434d149bb | ||
|
|
eab288d507 | ||
|
|
091c7edd60 | ||
|
|
0373f48956 | ||
|
|
32eae84170 | ||
|
|
359fe17ac3 | ||
|
|
19daa86e52 | ||
|
|
e428bead6d | ||
|
|
ee5de4b465 | ||
|
|
ee8c68d215 | ||
|
|
76cdec4a5c | ||
|
|
7d9892de85 | ||
|
|
faffca21ef | ||
|
|
a74ab02563 | ||
|
|
8daa06d007 | ||
|
|
2ab3b7e9bf | ||
|
|
3234f64c45 | ||
|
|
437718083e | ||
|
|
0f28aee915 | ||
|
|
5e311b87da | ||
|
|
fdea8ca36e | ||
|
|
ca8cd55223 | ||
|
|
f4586b1501 | ||
|
|
f49b46363e | ||
|
|
40d4cf374c | ||
|
|
031c5acedf | ||
|
|
f4718a6cb9 | ||
|
|
f35b4ecde4 | ||
|
|
8257c4bde5 | ||
|
|
8b21e34250 | ||
|
|
e8ea43067a | ||
|
|
e695c55704 | ||
|
|
5c941a403e | ||
|
|
67b9ef1728 | ||
|
|
3d5c5a3c73 | ||
|
|
fa81a41ae5 | ||
|
|
fcd963ced6 | ||
|
|
e69148ca9e | ||
|
|
143f59018f | ||
|
|
b46717bf2b | ||
|
|
66971c82e8 | ||
|
|
040a42dba5 | ||
|
|
2e0b59553c | ||
|
|
26bcb6c9d8 | ||
|
|
e82618d1c2 | ||
|
|
e6c055c3fa | ||
|
|
925471b064 | ||
|
|
af72640c37 | ||
|
|
a44637f430 | ||
|
|
a446b55453 | ||
|
|
9072abf55d | ||
|
|
c6118be5a5 | ||
|
|
4ed17d4980 | ||
|
|
1bf2553aed | ||
|
|
04b34c5dd5 | ||
|
|
9614aeba7f | ||
|
|
eba4e4e58e | ||
|
|
c5d155b617 | ||
|
|
418b7a83d3 | ||
|
|
4fccd2c181 | ||
|
|
f07752907a | ||
|
|
140af3e649 | ||
|
|
c9fa2a1873 | ||
|
|
7214d771b9 | ||
|
|
a2a48f9642 | ||
|
|
ad8fda002e | ||
|
|
99803a772e | ||
|
|
1defa289e5 | ||
|
|
7f725182aa | ||
|
|
7afe6093b0 | ||
|
|
4465222622 | ||
|
|
2f8d901829 | ||
|
|
e27b5c28fd | ||
|
|
47bd5957d4 | ||
|
|
edc3211544 | ||
|
|
a9ee19f871 | ||
|
|
c5ea25a200 | ||
|
|
81033022b9 | ||
|
|
ac60502f72 | ||
|
|
9dfff1261d | ||
|
|
69cc8a70c0 | ||
|
|
ba60c906f5 | ||
|
|
751f9b57ac | ||
|
|
a9672ac9c1 | ||
|
|
ea6d0e87da | ||
|
|
22e6795789 | ||
|
|
302fbc9947 | ||
|
|
3483d77aa4 | ||
|
|
871af57840 | ||
|
|
c76d25bf3f | ||
|
|
2c3ece7010 | ||
|
|
930efe5a0e | ||
|
|
aac00bf7d0 | ||
|
|
6f19ff0756 | ||
|
|
f18f303fa9 | ||
|
|
b5e290e2c1 | ||
|
|
a10fb2fac9 | ||
|
|
83a7b5383f | ||
|
|
978513efc0 | ||
|
|
65f5de1df1 | ||
|
|
df10702486 | ||
|
|
a288b51b73 | ||
|
|
a6337d0817 | ||
|
|
d63713ece1 | ||
|
|
ed2466d16d | ||
|
|
aab657da9b | ||
|
|
79b9a46bae | ||
|
|
cac7305f53 | ||
|
|
80cd37bdde | ||
|
|
ff4218c0b8 | ||
|
|
f50cc7253b | ||
|
|
4fc495342b | ||
|
|
b0a5e02dd5 | ||
|
|
f7af95d6c7 | ||
|
|
706eb8a9c9 | ||
|
|
e30e58f032 | ||
|
|
3b866f4f32 | ||
|
|
d437f58eb9 | ||
|
|
ecea237187 | ||
|
|
aa1a0ee32a | ||
|
|
4c3d67d442 | ||
|
|
061f131919 | ||
|
|
1e793f3d48 | ||
|
|
3876957917 | ||
|
|
76879e593d | ||
|
|
f9b353d803 | ||
|
|
fb2c7fb149 | ||
|
|
21808fb1c8 | ||
|
|
a4a15cb534 | ||
|
|
04aa34dca4 | ||
|
|
5c98d95a5a | ||
|
|
49faafa3e0 | ||
|
|
eb69307ddb | ||
|
|
c8202d9533 | ||
|
|
01c1792ca5 | ||
|
|
32e7f593be | ||
|
|
538d841906 | ||
|
|
3050f50893 | ||
|
|
49c6f042d7 | ||
|
|
320f592e51 | ||
|
|
c0c8fd5845 | ||
|
|
7b9276c5a2 | ||
|
|
e5e15b262d | ||
|
|
4092ce92b9 | ||
|
|
819f26e09e | ||
|
|
b500542437 | ||
|
|
652193a2f2 | ||
|
|
7ecdfa4fb7 | ||
|
|
10e95f19d0 | ||
|
|
da68fe948b | ||
|
|
cc3ed86f3b | ||
|
|
60017a5abe | ||
|
|
8cc869694d | ||
|
|
3fbf70fb4f | ||
|
|
f7e7e55441 | ||
|
|
d677d442bd | ||
|
|
7a47d93d9e | ||
|
|
e102f76631 | ||
|
|
9bd9d4347b | ||
|
|
54494aef6c | ||
|
|
c4c3ccc79a |
4
.dockerignore
Normal file
4
.dockerignore
Normal file
@@ -0,0 +1,4 @@
|
||||
.*
|
||||
last_synced_block.txt
|
||||
pid.txt
|
||||
output
|
||||
4
.github/FUNDING.yml
vendored
Normal file
4
.github/FUNDING.yml
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
custom: https://gitcoin.co/grants/233/ethereumetl
|
||||
|
||||
15
.travis.yml
15
.travis.yml
@@ -1,7 +1,14 @@
|
||||
language: python
|
||||
python:
|
||||
- "3.6"
|
||||
dist: xenial
|
||||
matrix:
|
||||
include:
|
||||
- python: "3.5"
|
||||
env: TOX_POSARGS="-e py35"
|
||||
- python: "3.6"
|
||||
env: TOX_POSARGS="-e py36"
|
||||
- python: "3.7"
|
||||
env: TOX_POSARGS="-e py37"
|
||||
install:
|
||||
- travis_retry pip install -r requirements.txt
|
||||
- travis_retry pip install tox
|
||||
script:
|
||||
- pytest -vv
|
||||
- tox $TOX_POSARGS
|
||||
15
Dockerfile
15
Dockerfile
@@ -1,12 +1,15 @@
|
||||
FROM python:3.6-alpine
|
||||
MAINTAINER Eric Lim <elim0322@gmail.com>
|
||||
FROM python:3.6
|
||||
MAINTAINER Evgeny Medvedev <evge.medvedev@gmail.com>
|
||||
ENV PROJECT_DIR=ethereum-etl
|
||||
|
||||
RUN mkdir /$PROJECT_DIR
|
||||
WORKDIR /$PROJECT_DIR
|
||||
COPY requirements.txt .
|
||||
RUN apk add --no-cache gcc musl-dev #for C libraries: <limits.h> <stdio.h>
|
||||
RUN pip install --upgrade pip && pip install -r /$PROJECT_DIR/requirements.txt
|
||||
COPY . .
|
||||
RUN pip install --upgrade pip && pip install -e /$PROJECT_DIR/[streaming]
|
||||
|
||||
ENTRYPOINT ["python", "export_all.py"]
|
||||
# Add Tini
|
||||
ENV TINI_VERSION v0.18.0
|
||||
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
|
||||
RUN chmod +x /tini
|
||||
|
||||
ENTRYPOINT ["/tini", "--", "python", "ethereumetl"]
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com, https://twitter.com/EvgeMedvedev
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
555
README.md
555
README.md
@@ -1,544 +1,107 @@
|
||||
# Ethereum ETL
|
||||
|
||||
[](https://gitter.im/ethereum-etl/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[](https://travis-ci.org/blockchain-etl/ethereum-etl)
|
||||
[Join Telegram Group](https://t.me/joinchat/GsMpbA3mv1OJ6YMp3T5ORQ)
|
||||
[](https://gitter.im/ethereum-etl/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[](https://t.me/joinchat/GsMpbA3mv1OJ6YMp3T5ORQ)
|
||||
[](https://discord.gg/wukrezR)
|
||||
|
||||
Ethereum ETL lets you convert blockchain data into convenient formats like CSVs and relational databases.
|
||||
|
||||
*Do you just want to query Ethereum data right away? Use the [public dataset in BigQuery](https://console.cloud.google.com/marketplace/details/ethereum/crypto-ethereum-blockchain).*
|
||||
|
||||
[Full documentation available here](http://ethereum-etl.readthedocs.io/).
|
||||
|
||||
## Quickstart
|
||||
|
||||
Install Ethereum ETL:
|
||||
|
||||
```bash
|
||||
pip install ethereum-etl
|
||||
pip3 install ethereum-etl
|
||||
```
|
||||
|
||||
Export blocks and transactions ([Schema](#blockscsv), [Reference](#export_blocks_and_transactions)):
|
||||
Export blocks and transactions ([Schema](docs/schema.md#blockscsv), [Reference](docs/commands.md#export_blocks_and_transactions)):
|
||||
|
||||
```bash
|
||||
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
|
||||
--provider-uri https://mainnet.infura.io --blocks-output blocks.csv --transactions-output transactions.csv
|
||||
```
|
||||
|
||||
Export ERC20 and ERC721 transfers ([Schema](#token_transferscsv), [Reference](#export_token_transfers)):
|
||||
Export ERC20 and ERC721 transfers ([Schema](docs/schema.md#token_transferscsv), [Reference](docs/commands.md##export_token_transfers)):
|
||||
|
||||
```bash
|
||||
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output token_transfers.csv
|
||||
```
|
||||
|
||||
Export receipts and logs ([Schema](#receiptscsv), [Reference](#export_receipts_and_logs)):
|
||||
|
||||
```bash
|
||||
> ethereumetl export_receipts_and_logs --transaction-hashes transaction_hashes.txt \
|
||||
--provider-uri https://mainnet.infura.io --receipts-output receipts.csv --logs-output logs.csv
|
||||
```
|
||||
|
||||
Export ERC20 and ERC721 token details ([Schema](#tokenscsv), [Reference](#export_tokens)):
|
||||
|
||||
```bash
|
||||
> ethereumetl export_tokens --token-addresses token_addresses.csv \
|
||||
--provider-uri https://mainnet.infura.io --output tokens.csv
|
||||
```
|
||||
|
||||
Export traces ([Schema](#tracescsv), [Reference](#export_traces)):
|
||||
Export traces ([Schema](docs/schema.md#tracescsv), [Reference](docs/commands.md#export_traces)):
|
||||
|
||||
```bash
|
||||
> ethereumetl export_traces --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/parity.ipc --output traces.csv
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
Stream blocks, transactions, logs, token_transfers continually to console ([Reference](docs/commands.md#stream)):
|
||||
|
||||
```bash
|
||||
> pip3 install ethereum-etl[streaming]
|
||||
> ethereumetl stream --start-block 500000 -e block,transaction,log,token_transfer --log-file log.txt
|
||||
```
|
||||
|
||||
Find other commands [here](https://ethereum-etl.readthedocs.io/en/latest/commands/).
|
||||
|
||||
For the latest version, check out the repo and call
|
||||
```bash
|
||||
> pip install -e .
|
||||
> python ethereumetl.py
|
||||
> pip3 install -e .
|
||||
> python3 ethereumetl.py
|
||||
```
|
||||
|
||||
[LIMITATIONS](#limitations)
|
||||
## Useful Links
|
||||
|
||||
## Table of Contents
|
||||
- [Schema](https://ethereum-etl.readthedocs.io/en/latest/schema/)
|
||||
- [Command Reference](https://ethereum-etl.readthedocs.io/en/latest/commands/)
|
||||
- [Documentation](https://ethereum-etl.readthedocs.io/)
|
||||
- [Exporting the Blockchain](https://ethereum-etl.readthedocs.io/en/latest/exporting-the-blockchain/)
|
||||
- [Querying in Amazon Athena](https://ethereum-etl.readthedocs.io/en/latest/amazon-athena/)
|
||||
- [Querying in Google BigQuery](https://ethereum-etl.readthedocs.io/en/latest/google-bigquery/)
|
||||
- [Querying in Kaggle](https://www.kaggle.com/bigquery/ethereum-blockchain)
|
||||
- [Airflow DAGs](https://github.com/blockchain-etl/ethereum-etl-airflow)
|
||||
- [Postgres ETL](https://github.com/blockchain-etl/ethereum-etl-postgresql)
|
||||
|
||||
- [Schema](#schema)
|
||||
- [blocks.csv](#blockscsv)
|
||||
- [transactions.csv](#transactionscsv)
|
||||
- [token_transfers.csv](#token_transferscsv)
|
||||
- [receipts.csv](#receiptscsv)
|
||||
- [logs.csv](#logscsv)
|
||||
- [contracts.csv](#contractscsv)
|
||||
- [tokens.csv](#tokenscsv)
|
||||
- [traces.csv](#tracescsv)
|
||||
- [Exporting the Blockchain](#exporting-the-blockchain)
|
||||
- [Export in 2 Hours](#export-in-2-hours)
|
||||
- [Command Reference](#command-reference)
|
||||
- [Querying in Amazon Athena](#querying-in-amazon-athena)
|
||||
- [Querying in Google BigQuery](#querying-in-google-bigquery)
|
||||
- [Public Dataset](#public-dataset)
|
||||
|
||||
|
||||
## Schema
|
||||
|
||||
### blocks.csv
|
||||
|
||||
Column | Type |
|
||||
------------------------|--------------------|
|
||||
number | bigint |
|
||||
hash | hex_string |
|
||||
parent_hash | hex_string |
|
||||
nonce | hex_string |
|
||||
sha3_uncles | hex_string |
|
||||
logs_bloom | hex_string |
|
||||
transactions_root | hex_string |
|
||||
state_root | hex_string |
|
||||
receipts_root | hex_string |
|
||||
miner | address |
|
||||
difficulty | numeric |
|
||||
total_difficulty | numeric |
|
||||
size | bigint |
|
||||
extra_data | hex_string |
|
||||
gas_limit | bigint |
|
||||
gas_used | bigint |
|
||||
timestamp | bigint |
|
||||
transaction_count | bigint |
|
||||
|
||||
### transactions.csv
|
||||
|
||||
Column | Type |
|
||||
--------------------|-------------|
|
||||
hash | hex_string |
|
||||
nonce | bigint |
|
||||
block_hash | hex_string |
|
||||
block_number | bigint |
|
||||
transaction_index| bigint |
|
||||
from_address | address |
|
||||
to_address | address |
|
||||
value | numeric |
|
||||
gas | bigint |
|
||||
gas_price | bigint |
|
||||
input | hex_string |
|
||||
|
||||
### token_transfers.csv
|
||||
|
||||
Column | Type |
|
||||
--------------------|-------------|
|
||||
token_address | address |
|
||||
from_address | address |
|
||||
to_address | address |
|
||||
value | numeric |
|
||||
transaction_hash | hex_string |
|
||||
log_index | bigint |
|
||||
block_number | bigint |
|
||||
|
||||
### receipts.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
transaction_hash | hex_string |
|
||||
transaction_index | bigint |
|
||||
block_hash | hex_string |
|
||||
block_number | bigint |
|
||||
cumulative_gas_used | bigint |
|
||||
gas_used | bigint |
|
||||
contract_address | address |
|
||||
root | hex_string |
|
||||
status | bigint |
|
||||
|
||||
### logs.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
log_index | bigint |
|
||||
transaction_hash | hex_string |
|
||||
transaction_index | bigint |
|
||||
block_hash | hex_string |
|
||||
block_number | bigint |
|
||||
address | address |
|
||||
data | hex_string |
|
||||
topics | string |
|
||||
|
||||
### contracts.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
address | address |
|
||||
bytecode | hex_string |
|
||||
function_sighashes | string |
|
||||
is_erc20 | boolean |
|
||||
is_erc721 | boolean |
|
||||
|
||||
### tokens.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
address | address |
|
||||
symbol | string |
|
||||
name | string |
|
||||
decimals | bigint |
|
||||
total_supply | numeric |
|
||||
|
||||
### traces.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
block_number | bigint |
|
||||
transaction_hash | hex_string |
|
||||
transaction_index | bigint |
|
||||
from_address | address |
|
||||
to_address | address |
|
||||
value | numeric |
|
||||
input | hex_string |
|
||||
output | hex_string |
|
||||
trace_type | string |
|
||||
call_type | string |
|
||||
reward_type | string |
|
||||
gas | bigint |
|
||||
gas_used | bigint |
|
||||
subtraces | bigint |
|
||||
trace_address | string |
|
||||
error | string |
|
||||
|
||||
You can find column descriptions in [https://github.com/medvedev1088/ethereum-etl-airflow](https://github.com/medvedev1088/ethereum-etl-airflow/tree/master/dags/resources/stages/raw/schemas)
|
||||
|
||||
Note: for the `address` type all hex characters are lower-cased.
|
||||
`boolean` type can have 2 values: `True` or `False`.
|
||||
|
||||
## LIMITATIONS
|
||||
|
||||
- `contracts.csv` and `tokens.csv` files don’t include contracts created by message calls (a.k.a. internal transactions).
|
||||
We are working on adding support for those.
|
||||
- In case the contract is a proxy, which forwards all calls to a delegate, interface detection doesn’t work,
|
||||
which means `is_erc20` and `is_erc721` will always be false for proxy contracts.
|
||||
- The metadata methods (`symbol`, `name`, `decimals`, `total_supply`) for ERC20 are optional, so around 10% of the
|
||||
contracts are missing this data. Also some contracts (EOS) implement these methods but with wrong return type,
|
||||
so the metadata columns are missing in this case as well.
|
||||
- `token_transfers.value`, `tokens.decimals` and `tokens.total_supply` have type `STRING` in BigQuery tables,
|
||||
because numeric types there can't handle 32-byte integers. You should use
|
||||
`cast(value as FLOAT64)` (possible loss of precision) or
|
||||
`safe_cast(value as NUMERIC)` (possible overflow) to convert to numbers.
|
||||
- The contracts that don't implement `decimals()` function but have the
|
||||
[fallback function](https://solidity.readthedocs.io/en/v0.4.21/contracts.html#fallback-function) that returns a `boolean`
|
||||
will have `0` or `1` in the `decimals` column in the CSVs.
|
||||
|
||||
### Differences between geth and parity traces.csv
|
||||
|
||||
- `to_address` field differs for `callcode` trace (geth seems to return correct value, as parity value of `to_address` is same as `to_address` of parent call);
|
||||
- geth output doesn't have `reward` traces;
|
||||
- geth output doesn't have `to_address`, `from_address`, `value` for `suicide traces;
|
||||
- `error` field contains human readable error message, which might differ in geth/parity output;
|
||||
- geth output doesn't have `transaction_hash`;
|
||||
- `gas_used` is 0 on traces with error in geth, empty in parity;
|
||||
- zero output of subcalls is `0x000...` in geth, `0x` in parity;
|
||||
|
||||
|
||||
## Exporting the Blockchain
|
||||
|
||||
1. Install python 3.6 https://www.python.org/downloads/ (3.5 and 3.7 are not supported by this tool for now)
|
||||
|
||||
1. You can use Infura if you don't need ERC20 transfers (Infura doesn't support eth_getFilterLogs JSON RPC method).
|
||||
For that use `-p https://mainnet.infura.io` option for the commands below. If you need ERC20 transfers or want to
|
||||
export the data ~40 times faster, you will need to set up a local Ethereum node:
|
||||
|
||||
1. Install geth https://github.com/ethereum/go-ethereum/wiki/Installing-Geth
|
||||
|
||||
1. Start geth.
|
||||
Make sure it downloaded the blocks that you need by executing `eth.syncing` in the JS console.
|
||||
You can export blocks below `currentBlock`,
|
||||
there is no need to wait until the full sync as the state is not needed (unless you also need contracts bytecode
|
||||
and token details; for those you need to wait until the full sync).
|
||||
|
||||
1. Install Ethereum ETL:
|
||||
|
||||
```bash
|
||||
> pip install ethereum-etl
|
||||
```
|
||||
|
||||
1. Export all:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_all --help
|
||||
> ethereumetl export_all -s 0 -e 5999999 -b 100000 -p file://$HOME/Library/Ethereum/geth.ipc -o output
|
||||
```
|
||||
|
||||
The result will be in the `output` subdirectory, partitioned in Hive style:
|
||||
|
||||
```bash
|
||||
output/blocks/start_block=00000000/end_block=00099999/blocks_00000000_00099999.csv
|
||||
output/blocks/start_block=00100000/end_block=00199999/blocks_00100000_00199999.csv
|
||||
...
|
||||
output/transactions/start_block=00000000/end_block=00099999/transactions_00000000_00099999.csv
|
||||
...
|
||||
output/token_transfers/start_block=00000000/end_block=00099999/token_transfers_00000000_00099999.csv
|
||||
...
|
||||
```
|
||||
|
||||
Should work with geth and parity, on Linux, Mac, Windows.
|
||||
If you use Parity you should disable warp mode with `--no-warp` option because warp mode
|
||||
does not place all of the block or receipt data into the database https://wiki.parity.io/Getting-Synced
|
||||
Tested with Python 3.6, geth 1.8.7, Ubuntu 16.04.4
|
||||
|
||||
If you see weird behavior, e.g. wrong number of rows in the CSV files or corrupted files,
|
||||
check this issue: https://github.com/medvedev1088/ethereum-etl/issues/28
|
||||
|
||||
### Export in 2 Hours
|
||||
|
||||
You can use AWS Auto Scaling and Data Pipeline to reduce the exporting time to a few hours.
|
||||
Read this article for details https://medium.com/@medvedev1088/how-to-export-the-entire-ethereum-blockchain-to-csv-in-2-hours-for-10-69fef511e9a2
|
||||
|
||||
### Running in Docker
|
||||
|
||||
1. Install Docker https://docs.docker.com/install/
|
||||
|
||||
1. Build a docker image
|
||||
```bash
|
||||
> docker build -t ethereum-etl:latest .
|
||||
> docker image ls
|
||||
```
|
||||
|
||||
1. Run a container out of the image
|
||||
```bash
|
||||
> docker run -v $HOME/output:/ethereum-etl/output ethereum-etl:latest -s 0 -e 5499999 -b 100000 -p https://mainnet.infura.io
|
||||
> docker run -v $HOME/output:/ethereum-etl/output ethereum-etl:latest -s 2018-01-01 -e 2018-01-01 -b 100000 -p https://mainnet.infura.io
|
||||
```
|
||||
|
||||
### Command Reference
|
||||
|
||||
- [export_blocks_and_transactions](#export_blocks_and_transactions)
|
||||
- [export_token_transfers](#export_token_transfers)
|
||||
- [extract_token_transfers](#extract_token_transfers)
|
||||
- [export_receipts_and_logs](#export_receipts_and_logs)
|
||||
- [export_contracts](#export_contracts)
|
||||
- [export_tokens](#export_tokens)
|
||||
- [export_traces](#export_traces)
|
||||
- [export_geth_traces](#export_geth_traces)
|
||||
- [extract_geth_traces](#extract_geth_traces)
|
||||
- [get_block_range_for_date](#get_block_range_for_date)
|
||||
- [get_keccak_hash](#get_keccak_hash)
|
||||
|
||||
All the commands accept `-h` parameter for help, e.g.:
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
> ethereumetl export_blocks_and_transactions -h
|
||||
|
||||
Usage: ethereumetl export_blocks_and_transactions [OPTIONS]
|
||||
|
||||
Export blocks and transactions.
|
||||
|
||||
Options:
|
||||
-s, --start-block INTEGER Start block
|
||||
-e, --end-block INTEGER End block [required]
|
||||
-b, --batch-size INTEGER The number of blocks to export at a time.
|
||||
-p, --provider-uri TEXT The URI of the web3 provider e.g.
|
||||
file://$HOME/Library/Ethereum/geth.ipc or
|
||||
https://mainnet.infura.io
|
||||
-w, --max-workers INTEGER The maximum number of workers.
|
||||
--blocks-output TEXT The output file for blocks. If not provided
|
||||
blocks will not be exported. Use "-" for stdout
|
||||
--transactions-output TEXT The output file for transactions. If not
|
||||
provided transactions will not be exported. Use
|
||||
"-" for stdout
|
||||
-h, --help Show this message and exit.
|
||||
```
|
||||
|
||||
For the `--output` parameters the supported types are csv and json. The format type is inferred from the output file name.
|
||||
|
||||
#### export_blocks_and_transactions
|
||||
|
||||
```bash
|
||||
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc \
|
||||
--blocks-output blocks.csv --transactions-output transactions.csv
|
||||
```
|
||||
|
||||
Omit `--blocks-output` or `--transactions-output` options if you want to export only transactions/blocks.
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
#### export_token_transfers
|
||||
|
||||
The API used in this command is not supported by Infura, so you will need a local node.
|
||||
If you want to use Infura for exporting ERC20 transfers refer to [extract_token_transfers](#extract_token_transfers)
|
||||
|
||||
```bash
|
||||
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --batch-size 100 --output token_transfers.csv
|
||||
```
|
||||
|
||||
Include `--tokens <token1> --tokens <token2>` to filter only certain tokens, e.g.
|
||||
|
||||
```bash
|
||||
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output token_transfers.csv \
|
||||
--tokens 0x86fa049857e0209aa7d9e616f7eb3b3b78ecfdb0 --tokens 0x06012c8cf97bead5deae237070f9587f8e7a266d
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
#### export_receipts_and_logs
|
||||
|
||||
First extract transaction hashes from `transactions.csv`
|
||||
(Exported with [export_blocks_and_transactions](#export_blocks_and_transactions)):
|
||||
|
||||
```bash
|
||||
> ethereumetl extract_csv_column --input transactions.csv --column hash --output transaction_hashes.txt
|
||||
```
|
||||
|
||||
Then export receipts and logs:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_receipts_and_logs --transaction-hashes transaction_hashes.txt \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --receipts-output receipts.csv --logs-output logs.csv
|
||||
```
|
||||
|
||||
Omit `--receipts-output` or `--logs-output` options if you want to export only logs/receipts.
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
Upvote this feature request https://github.com/paritytech/parity/issues/9075,
|
||||
it will make receipts and logs export much faster.
|
||||
|
||||
#### extract_token_transfers
|
||||
|
||||
First export receipt logs with [export_receipts_and_logs](#export_receipts_and_logs).
|
||||
|
||||
Then extract transfers from the logs.csv file:
|
||||
|
||||
```bash
|
||||
> ethereumetl extract_token_transfers --logs logs.csv --output token_transfers.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
#### export_contracts
|
||||
|
||||
First extract contract addresses from `receipts.csv`
|
||||
(Exported with [export_receipts_and_logs](#export_receipts_and_logs)):
|
||||
|
||||
```bash
|
||||
> ethereumetl extract_csv_column --input receipts.csv --column contract_address --output contract_addresses.txt
|
||||
```
|
||||
|
||||
Then export contracts:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_contracts --contract-addresses contract_addresses.txt \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output contracts.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
#### export_tokens
|
||||
|
||||
First extract token addresses from `contracts.json`
|
||||
(Exported with [export_contracts](#export_contracts)):
|
||||
|
||||
```bash
|
||||
> ethereumetl filter_items -i contracts.json -p "item['is_erc20'] or item['is_erc721']" | \
|
||||
ethereumetl extract_field -f address -o token_addresses.txt
|
||||
```
|
||||
|
||||
Then export ERC20 / ERC721 tokens:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_tokens --token-addresses token_addresses.txt \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output tokens.csv
|
||||
```
|
||||
|
||||
You can tune `--max-workers` for performance.
|
||||
|
||||
#### export_traces
|
||||
|
||||
The API used in this command is not supported by Infura,
|
||||
so you will need a local Parity archive node (`parity --tracing on`).
|
||||
|
||||
```bash
|
||||
> ethereumetl export_traces --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/parity.ipc --batch-size 100 --output traces.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
#### export_geth_traces
|
||||
|
||||
The API used in this command is not supported by Infura,
|
||||
so you will need a local Geth archive node (`geth --gcmode archive --syncmode full --ipcapi debug`).
|
||||
When using rpc, add `--rpc --rpcapi debug` options.
|
||||
|
||||
```bash
|
||||
> ethereumetl export_geth_traces --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --batch-size 100 --output geth_traces.json
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
#### extract_geth_traces
|
||||
|
||||
```bash
|
||||
> ethereumetl extract_geth_traces --input geth_traces.json --output traces.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
#### get_block_range_for_date
|
||||
|
||||
```bash
|
||||
> ethereumetl get_block_range_for_date --provider-uri=https://mainnet.infura.io --date 2018-01-01
|
||||
4832686,4838611
|
||||
```
|
||||
|
||||
#### get_keccak_hash
|
||||
|
||||
```bash
|
||||
> ethereumetl get_keccak_hash -i "transfer(address,uint256)"
|
||||
0xa9059cbb2ab09eb219583f4a59a5d0623ade346d962bcd4e46b11da047c9049b
|
||||
```
|
||||
|
||||
#### Running Tests
|
||||
|
||||
```bash
|
||||
> pip install -e . -r requirements.txt
|
||||
> pip3 install -e .[dev,streaming]
|
||||
> export ETHEREUM_ETL_RUN_SLOW_TESTS=True
|
||||
> pytest -vv
|
||||
```
|
||||
|
||||
## Querying in Amazon Athena
|
||||
|
||||
- Upload the files to S3:
|
||||
### Running Tox Tests
|
||||
|
||||
```bash
|
||||
> cd output
|
||||
> aws s3 sync . s3://<your_bucket>/ethereumetl/export --region ap-southeast-1
|
||||
> pip3 install tox
|
||||
> tox
|
||||
```
|
||||
|
||||
- Sign in to Athena https://console.aws.amazon.com/athena/home
|
||||
## Running in Docker
|
||||
|
||||
- Create a database:
|
||||
1. Install Docker https://docs.docker.com/install/
|
||||
|
||||
```sql
|
||||
CREATE DATABASE ethereumetl;
|
||||
```
|
||||
2. Build a docker image
|
||||
|
||||
> docker build -t ethereum-etl:latest .
|
||||
> docker image ls
|
||||
|
||||
3. Run a container out of the image
|
||||
|
||||
- Create the tables:
|
||||
- blocks: [schemas/aws/blocks.sql](schemas/aws/blocks.sql)
|
||||
- transactions: [schemas/aws/transactions.sql](schemas/aws/transactions.sql)
|
||||
- token_transfers: [schemas/aws/token_transfers.sql](schemas/aws/token_transfers.sql)
|
||||
- contracts: [schemas/aws/contracts.sql](schemas/aws/contracts.sql)
|
||||
- receipts: [schemas/aws/receipts.sql](schemas/aws/receipts.sql)
|
||||
- logs: [schemas/aws/logs.sql](schemas/aws/logs.sql)
|
||||
- tokens: [schemas/aws/tokens.sql](schemas/aws/tokens.sql)
|
||||
> docker run -v $HOME/output:/ethereum-etl/output ethereum-etl:latest export_all -s 0 -e 5499999 -b 100000 -p https://mainnet.infura.io
|
||||
> docker run -v $HOME/output:/ethereum-etl/output ethereum-etl:latest export_all -s 2018-01-01 -e 2018-01-01 -p https://mainnet.infura.io
|
||||
|
||||
### Tables for Parquet Files
|
||||
4. Run streaming to console or Pub/Sub
|
||||
|
||||
Read this article on how to convert CSVs to Parquet https://medium.com/@medvedev1088/converting-ethereum-etl-files-to-parquet-399e048ddd30
|
||||
|
||||
- Create the tables:
|
||||
- parquet_blocks: [schemas/aws/parquet/parquet_blocks.sql](schemas/aws/parquet/parquet_blocks.sql)
|
||||
- parquet_transactions: [schemas/aws/parquet/parquet_transactions.sql](schemas/aws/parquet/parquet_transactions.sql)
|
||||
- parquet_token_transfers: [schemas/aws/parquet/parquet_token_transfers.sql](schemas/aws/parquet/parquet_token_transfers.sql)
|
||||
|
||||
Note that DECIMAL type is limited to 38 digits in Hive https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-decimal
|
||||
so values greater than 38 decimals will be null.
|
||||
|
||||
## Querying in Google BigQuery
|
||||
|
||||
Refer to https://github.com/medvedev1088/ethereum-etl-airflow for the instructions.
|
||||
|
||||
### Public Dataset
|
||||
|
||||
You can query the data that's updated daily in the public BigQuery dataset
|
||||
https://medium.com/@medvedev1088/ethereum-blockchain-on-google-bigquery-283fb300f579
|
||||
> docker build -t ethereum-etl:latest -f Dockerfile .
|
||||
> echo "Stream to console"
|
||||
> docker run ethereum-etl:latest stream --start-block 500000 --log-file log.txt
|
||||
> echo "Stream to Pub/Sub"
|
||||
> docker run -v /path_to_credentials_file/:/ethereum-etl/ --env GOOGLE_APPLICATION_CREDENTIALS=/ethereum-etl/credentials_file.json ethereum-etl:latest stream --start-block 500000 --output projects/<your-project>/topics/crypto_ethereum
|
||||
|
||||
0
blockchainetl/__init__.py
Normal file
0
blockchainetl/__init__.py
Normal file
@@ -20,11 +20,16 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import itertools
|
||||
|
||||
from ethereumetl.cli.export_all import export_all
|
||||
|
||||
print('========================================================================================')
|
||||
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
|
||||
print('========================================================================================')
|
||||
# https://stackoverflow.com/a/27062830/1580227
|
||||
class AtomicCounter:
|
||||
def __init__(self):
|
||||
self._counter = itertools.count()
|
||||
# init to 0
|
||||
next(self._counter)
|
||||
|
||||
export_all()
|
||||
def increment(self, increment=1):
|
||||
assert increment > 0
|
||||
return [next(self._counter) for _ in range(0, increment)][-1]
|
||||
@@ -21,10 +21,22 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.cli.export_receipts_and_logs import export_receipts_and_logs
|
||||
# https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072
|
||||
|
||||
print('========================================================================================')
|
||||
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
|
||||
print('========================================================================================')
|
||||
import sys
|
||||
import csv
|
||||
|
||||
export_receipts_and_logs()
|
||||
|
||||
def set_max_field_size_limit():
|
||||
max_int = sys.maxsize
|
||||
decrement = True
|
||||
while decrement:
|
||||
# decrease the maxInt value by factor 10
|
||||
# as long as the OverflowError occurs.
|
||||
|
||||
decrement = False
|
||||
try:
|
||||
csv.field_size_limit(max_int)
|
||||
except OverflowError:
|
||||
max_int = int(max_int / 10)
|
||||
decrement = True
|
||||
213
blockchainetl/exporters.py
Normal file
213
blockchainetl/exporters.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Copyright (c) Scrapy developers.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without modification,
|
||||
# are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions, and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions, and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of Scrapy nor the names of its contributors may be used
|
||||
# to endorse or promote products derived from this software without
|
||||
# specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""
|
||||
Item Exporters are used to export/serialize items into different formats.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import io
|
||||
import threading
|
||||
from json import JSONEncoder
|
||||
|
||||
import decimal
|
||||
import six
|
||||
|
||||
|
||||
class BaseItemExporter(object):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self._configure(kwargs)
|
||||
|
||||
def _configure(self, options, dont_fail=False):
|
||||
"""Configure the exporter by poping options from the ``options`` dict.
|
||||
If dont_fail is set, it won't raise an exception on unexpected options
|
||||
(useful for using with keyword arguments in subclasses constructors)
|
||||
"""
|
||||
self.encoding = options.pop('encoding', None)
|
||||
self.fields_to_export = options.pop('fields_to_export', None)
|
||||
self.export_empty_fields = options.pop('export_empty_fields', False)
|
||||
self.indent = options.pop('indent', None)
|
||||
if not dont_fail and options:
|
||||
raise TypeError("Unexpected options: %s" % ', '.join(options.keys()))
|
||||
|
||||
def export_item(self, item):
|
||||
raise NotImplementedError
|
||||
|
||||
def serialize_field(self, field, name, value):
|
||||
serializer = field.get('serializer', lambda x: x)
|
||||
return serializer(value)
|
||||
|
||||
def start_exporting(self):
|
||||
pass
|
||||
|
||||
def finish_exporting(self):
|
||||
pass
|
||||
|
||||
def _get_serialized_fields(self, item, default_value=None, include_empty=None):
|
||||
"""Return the fields to export as an iterable of tuples
|
||||
(name, serialized_value)
|
||||
"""
|
||||
if include_empty is None:
|
||||
include_empty = self.export_empty_fields
|
||||
if self.fields_to_export is None:
|
||||
if include_empty and not isinstance(item, dict):
|
||||
field_iter = six.iterkeys(item.fields)
|
||||
else:
|
||||
field_iter = six.iterkeys(item)
|
||||
else:
|
||||
if include_empty:
|
||||
field_iter = self.fields_to_export
|
||||
else:
|
||||
field_iter = (x for x in self.fields_to_export if x in item)
|
||||
|
||||
for field_name in field_iter:
|
||||
if field_name in item:
|
||||
field = {} if isinstance(item, dict) else item.fields[field_name]
|
||||
value = self.serialize_field(field, field_name, item[field_name])
|
||||
else:
|
||||
value = default_value
|
||||
|
||||
yield field_name, value
|
||||
|
||||
|
||||
class CsvItemExporter(BaseItemExporter):
|
||||
|
||||
def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs):
|
||||
self._configure(kwargs, dont_fail=True)
|
||||
if not self.encoding:
|
||||
self.encoding = 'utf-8'
|
||||
self.include_headers_line = include_headers_line
|
||||
self.stream = io.TextIOWrapper(
|
||||
file,
|
||||
line_buffering=False,
|
||||
write_through=True,
|
||||
encoding=self.encoding
|
||||
) if six.PY3 else file
|
||||
self.csv_writer = csv.writer(self.stream, **kwargs)
|
||||
self._headers_not_written = True
|
||||
self._join_multivalued = join_multivalued
|
||||
self._write_headers_lock = threading.Lock()
|
||||
|
||||
def serialize_field(self, field, name, value):
|
||||
serializer = field.get('serializer', self._join_if_needed)
|
||||
return serializer(value)
|
||||
|
||||
def _join_if_needed(self, value):
|
||||
if isinstance(value, (list, tuple)):
|
||||
try:
|
||||
return self._join_multivalued.join(str(x) for x in value)
|
||||
except TypeError: # list in value may not contain strings
|
||||
pass
|
||||
return value
|
||||
|
||||
def export_item(self, item):
|
||||
# Double-checked locking (safe in Python because of GIL) https://en.wikipedia.org/wiki/Double-checked_locking
|
||||
if self._headers_not_written:
|
||||
with self._write_headers_lock:
|
||||
if self._headers_not_written:
|
||||
self._write_headers_and_set_fields_to_export(item)
|
||||
self._headers_not_written = False
|
||||
|
||||
fields = self._get_serialized_fields(item, default_value='',
|
||||
include_empty=True)
|
||||
values = list(self._build_row(x for _, x in fields))
|
||||
self.csv_writer.writerow(values)
|
||||
|
||||
def _build_row(self, values):
|
||||
for s in values:
|
||||
try:
|
||||
yield to_native_str(s, self.encoding)
|
||||
except TypeError:
|
||||
yield s
|
||||
|
||||
def _write_headers_and_set_fields_to_export(self, item):
|
||||
if self.include_headers_line:
|
||||
if not self.fields_to_export:
|
||||
if isinstance(item, dict):
|
||||
# for dicts try using fields of the first item
|
||||
self.fields_to_export = list(item.keys())
|
||||
else:
|
||||
# use fields declared in Item
|
||||
self.fields_to_export = list(item.fields.keys())
|
||||
row = list(self._build_row(self.fields_to_export))
|
||||
self.csv_writer.writerow(row)
|
||||
|
||||
def EncodeDecimal(o):
|
||||
if isinstance(o, decimal.Decimal):
|
||||
return float(round(o, 8))
|
||||
raise TypeError(repr(o) + " is not JSON serializable")
|
||||
|
||||
class JsonLinesItemExporter(BaseItemExporter):
|
||||
|
||||
def __init__(self, file, **kwargs):
|
||||
self._configure(kwargs, dont_fail=True)
|
||||
self.file = file
|
||||
kwargs.setdefault('ensure_ascii', not self.encoding)
|
||||
# kwargs.setdefault('default', EncodeDecimal)
|
||||
self.encoder = JSONEncoder(default=EncodeDecimal, **kwargs)
|
||||
|
||||
def export_item(self, item):
|
||||
itemdict = dict(self._get_serialized_fields(item))
|
||||
data = self.encoder.encode(itemdict) + '\n'
|
||||
self.file.write(to_bytes(data, self.encoding))
|
||||
|
||||
|
||||
def to_native_str(text, encoding=None, errors='strict'):
|
||||
""" Return str representation of `text`
|
||||
(bytes in Python 2.x and unicode in Python 3.x). """
|
||||
if six.PY2:
|
||||
return to_bytes(text, encoding, errors)
|
||||
else:
|
||||
return to_unicode(text, encoding, errors)
|
||||
|
||||
|
||||
def to_bytes(text, encoding=None, errors='strict'):
|
||||
"""Return the binary representation of `text`. If `text`
|
||||
is already a bytes object, return it as-is."""
|
||||
if isinstance(text, bytes):
|
||||
return text
|
||||
if not isinstance(text, six.string_types):
|
||||
raise TypeError('to_bytes must receive a unicode, str or bytes '
|
||||
'object, got %s' % type(text).__name__)
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
return text.encode(encoding, errors)
|
||||
|
||||
|
||||
def to_unicode(text, encoding=None, errors='strict'):
|
||||
"""Return the unicode representation of a bytes object `text`. If `text`
|
||||
is already an unicode object, return it as-is."""
|
||||
if isinstance(text, six.text_type):
|
||||
return text
|
||||
if not isinstance(text, (bytes, six.text_type)):
|
||||
raise TypeError('to_unicode must receive a bytes, str or unicode '
|
||||
'object, got %s' % type(text).__name__)
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
return text.decode(encoding, errors)
|
||||
0
blockchainetl/jobs/__init__.py
Normal file
0
blockchainetl/jobs/__init__.py
Normal file
0
blockchainetl/jobs/exporters/__init__.py
Normal file
0
blockchainetl/jobs/exporters/__init__.py
Normal file
@@ -21,15 +21,15 @@
|
||||
# SOFTWARE.
|
||||
import logging
|
||||
|
||||
from ethereumetl.atomic_counter import AtomicCounter
|
||||
from ethereumetl.exporters import CsvItemExporter, JsonLinesItemExporter
|
||||
from ethereumetl.file_utils import get_file_handle, close_silently
|
||||
from blockchainetl.atomic_counter import AtomicCounter
|
||||
from blockchainetl.exporters import CsvItemExporter, JsonLinesItemExporter
|
||||
from blockchainetl.file_utils import get_file_handle, close_silently
|
||||
|
||||
|
||||
class CompositeItemExporter:
|
||||
def __init__(self, filename_mapping, field_mapping):
|
||||
def __init__(self, filename_mapping, field_mapping=None):
|
||||
self.filename_mapping = filename_mapping
|
||||
self.field_mapping = field_mapping
|
||||
self.field_mapping = field_mapping or {}
|
||||
|
||||
self.file_mapping = {}
|
||||
self.exporter_mapping = {}
|
||||
@@ -40,7 +40,7 @@ class CompositeItemExporter:
|
||||
def open(self):
|
||||
for item_type, filename in self.filename_mapping.items():
|
||||
file = get_file_handle(filename, binary=True)
|
||||
fields = self.field_mapping[item_type]
|
||||
fields = self.field_mapping.get(item_type)
|
||||
self.file_mapping[item_type] = file
|
||||
if str(filename).endswith('.json'):
|
||||
item_exporter = JsonLinesItemExporter(file, fields_to_export=fields)
|
||||
@@ -50,12 +50,16 @@ class CompositeItemExporter:
|
||||
|
||||
self.counter_mapping[item_type] = AtomicCounter()
|
||||
|
||||
def export_items(self, items):
|
||||
for item in items:
|
||||
self.export_item(item)
|
||||
|
||||
def export_item(self, item):
|
||||
item_type = item.get('type')
|
||||
if item_type is None:
|
||||
raise ValueError('type key is not found in item {}'.format(repr(item)))
|
||||
raise ValueError('"type" key is not found in item {}'.format(repr(item)))
|
||||
|
||||
exporter = self.exporter_mapping[item_type]
|
||||
exporter = self.exporter_mapping.get(item_type)
|
||||
if exporter is None:
|
||||
raise ValueError('Exporter for item type {} not found'.format(item_type))
|
||||
exporter.export_item(item)
|
||||
@@ -20,11 +20,19 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import json
|
||||
|
||||
from ethereumetl.cli.filter_items import filter_items
|
||||
|
||||
print('========================================================================================')
|
||||
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
|
||||
print('========================================================================================')
|
||||
class ConsoleItemExporter:
|
||||
def open(self):
|
||||
pass
|
||||
|
||||
filter_items()
|
||||
def export_items(self, items):
|
||||
for item in items:
|
||||
self.export_item(item)
|
||||
|
||||
def export_item(self, item):
|
||||
print(json.dumps(item))
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
@@ -0,0 +1,42 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# MIT License
|
||||
#
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
|
||||
class CompositeItemConverter:
|
||||
|
||||
def __init__(self, converters=()):
|
||||
self.converters = converters
|
||||
|
||||
def convert_item(self, item):
|
||||
for converter in self.converters:
|
||||
item = converter.convert_item(item)
|
||||
return item
|
||||
@@ -0,0 +1,47 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# MIT License
|
||||
#
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
#
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
from blockchainetl.jobs.exporters.converters.simple_item_converter import SimpleItemConverter
|
||||
|
||||
# Large ints are not handled correctly by pg8000 so we use Decimal instead:
|
||||
# https://github.com/mfenniak/pg8000/blob/412eace074514ada824e7a102765e37e2cda8eaa/pg8000/core.py#L1703
|
||||
class IntToDecimalItemConverter(SimpleItemConverter):
|
||||
|
||||
def convert_field(self, key, value):
|
||||
if isinstance(value, int):
|
||||
return Decimal(value)
|
||||
else:
|
||||
return value
|
||||
@@ -0,0 +1,56 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# MIT License
|
||||
#
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
|
||||
class ListFieldItemConverter:
|
||||
|
||||
def __init__(self, field, new_field_prefix, fill=0, fill_with=None):
|
||||
self.field = field
|
||||
self.new_field_prefix = new_field_prefix
|
||||
self.fill = fill
|
||||
self.fill_with = fill_with
|
||||
|
||||
def convert_item(self, item):
|
||||
if not item:
|
||||
return item
|
||||
|
||||
lst = item.get(self.field)
|
||||
result = item
|
||||
if lst is not None and isinstance(lst, list):
|
||||
result = item.copy()
|
||||
del result[self.field]
|
||||
for lst_item_index, lst_item in enumerate(lst):
|
||||
result[self.new_field_prefix + str(lst_item_index)] = lst_item
|
||||
if len(lst) < self.fill:
|
||||
for i in range(len(lst), self.fill):
|
||||
result[self.new_field_prefix + str(i)] = self.fill_with
|
||||
return result
|
||||
@@ -0,0 +1,45 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# MIT License
|
||||
#
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
|
||||
class SimpleItemConverter:
|
||||
|
||||
def __init__(self, converters=()):
|
||||
self.converters = converters
|
||||
|
||||
def convert_item(self, item):
|
||||
return {
|
||||
key: self.convert_field(key, value) for key, value in item.items()
|
||||
}
|
||||
|
||||
def convert_field(self, key, value):
|
||||
return value
|
||||
@@ -1,6 +1,6 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeniy Filatov, evgeniyfilatov@gmail.com
|
||||
# Copyright (c) 2020 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -20,11 +20,22 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from ethereumetl.cli.export_geth_traces import export_geth_traces
|
||||
from blockchainetl.jobs.exporters.converters.simple_item_converter import SimpleItemConverter
|
||||
|
||||
print('========================================================================================')
|
||||
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
|
||||
print('========================================================================================')
|
||||
|
||||
export_geth_traces()
|
||||
class UnixTimestampItemConverter(SimpleItemConverter):
|
||||
|
||||
def convert_field(self, key, value):
|
||||
if key is not None and key.endswith('timestamp'):
|
||||
return to_timestamp(value)
|
||||
else:
|
||||
return value
|
||||
|
||||
|
||||
def to_timestamp(value):
|
||||
if isinstance(value, int):
|
||||
return datetime.utcfromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S')
|
||||
else:
|
||||
return value
|
||||
94
blockchainetl/jobs/exporters/google_pubsub_item_exporter.py
Normal file
94
blockchainetl/jobs/exporters/google_pubsub_item_exporter.py
Normal file
@@ -0,0 +1,94 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from google.cloud import pubsub_v1
|
||||
from timeout_decorator import timeout_decorator
|
||||
|
||||
|
||||
class GooglePubSubItemExporter:
|
||||
|
||||
def __init__(self, item_type_to_topic_mapping, message_attributes=('item_id',)):
|
||||
self.item_type_to_topic_mapping = item_type_to_topic_mapping
|
||||
self.publisher = create_publisher()
|
||||
self.message_attributes = message_attributes
|
||||
|
||||
def open(self):
|
||||
pass
|
||||
|
||||
def export_items(self, items):
|
||||
try:
|
||||
self._export_items_with_timeout(items)
|
||||
except timeout_decorator.TimeoutError as e:
|
||||
# A bug in PubSub publisher that makes it stalled after running for some time.
|
||||
# Exception in thread Thread-CommitBatchPublisher:
|
||||
# details = "channel is in state TRANSIENT_FAILURE"
|
||||
# https://stackoverflow.com/questions/55552606/how-can-one-catch-exceptions-in-python-pubsub-subscriber-that-are-happening-in-i?noredirect=1#comment97849067_55552606
|
||||
logging.info('Recreating Pub/Sub publisher.')
|
||||
self.publisher = create_publisher()
|
||||
raise e
|
||||
|
||||
@timeout_decorator.timeout(300)
|
||||
def _export_items_with_timeout(self, items):
|
||||
futures = []
|
||||
for item in items:
|
||||
message_future = self.export_item(item)
|
||||
futures.append(message_future)
|
||||
|
||||
for future in futures:
|
||||
# result() blocks until the message is published.
|
||||
future.result()
|
||||
|
||||
def export_item(self, item):
|
||||
item_type = item.get('type')
|
||||
if item_type is not None and item_type in self.item_type_to_topic_mapping:
|
||||
topic_path = self.item_type_to_topic_mapping.get(item_type)
|
||||
data = json.dumps(item).encode('utf-8')
|
||||
|
||||
message_future = self.publisher.publish(topic_path, data=data, **self.get_message_attributes(item))
|
||||
return message_future
|
||||
else:
|
||||
logging.warning('Topic for item type "{}" is not configured.'.format(item_type))
|
||||
|
||||
def get_message_attributes(self, item):
|
||||
attributes = {}
|
||||
|
||||
for attr_name in self.message_attributes:
|
||||
if item.get(attr_name) is not None:
|
||||
attributes[attr_name] = item.get(attr_name)
|
||||
|
||||
return attributes
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
def create_publisher():
|
||||
batch_settings = pubsub_v1.types.BatchSettings(
|
||||
max_bytes=1024 * 5, # 5 kilobytes
|
||||
max_latency=1, # 1 second
|
||||
max_messages=1000,
|
||||
)
|
||||
|
||||
return pubsub_v1.PublisherClient(batch_settings)
|
||||
@@ -21,10 +21,24 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.cli.export_token_transfers import export_token_transfers
|
||||
class InMemoryItemExporter:
|
||||
def __init__(self, item_types):
|
||||
self.item_types = item_types
|
||||
self.items = {}
|
||||
|
||||
print('========================================================================================')
|
||||
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
|
||||
print('========================================================================================')
|
||||
def open(self):
|
||||
for item_type in self.item_types:
|
||||
self.items[item_type] = []
|
||||
|
||||
export_token_transfers()
|
||||
def export_item(self, item):
|
||||
item_type = item.get('type', None)
|
||||
if item_type is None:
|
||||
raise ValueError('type key is not found in item {}'.format(repr(item)))
|
||||
|
||||
self.items[item_type].append(item)
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def get_items(self, item_type):
|
||||
return self.items[item_type]
|
||||
70
blockchainetl/jobs/exporters/postgres_item_exporter.py
Normal file
70
blockchainetl/jobs/exporters/postgres_item_exporter.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2020 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import collections
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
from blockchainetl.jobs.exporters.converters.composite_item_converter import CompositeItemConverter
|
||||
|
||||
|
||||
class PostgresItemExporter:
|
||||
|
||||
def __init__(self, connection_url, item_type_to_insert_stmt_mapping, converters=(), print_sql=True):
|
||||
self.connection_url = connection_url
|
||||
self.item_type_to_insert_stmt_mapping = item_type_to_insert_stmt_mapping
|
||||
self.converter = CompositeItemConverter(converters)
|
||||
self.print_sql = print_sql
|
||||
|
||||
self.engine = self.create_engine()
|
||||
|
||||
def open(self):
|
||||
pass
|
||||
|
||||
def export_items(self, items):
|
||||
items_grouped_by_type = group_by_item_type(items)
|
||||
|
||||
for item_type, insert_stmt in self.item_type_to_insert_stmt_mapping.items():
|
||||
item_group = items_grouped_by_type.get(item_type)
|
||||
if item_group:
|
||||
connection = self.engine.connect()
|
||||
converted_items = list(self.convert_items(item_group))
|
||||
connection.execute(insert_stmt, converted_items)
|
||||
|
||||
def convert_items(self, items):
|
||||
for item in items:
|
||||
yield self.converter.convert_item(item)
|
||||
|
||||
def create_engine(self):
|
||||
engine = create_engine(self.connection_url, echo=self.print_sql, pool_recycle=3600)
|
||||
return engine
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
def group_by_item_type(items):
|
||||
result = collections.defaultdict(list)
|
||||
for item in items:
|
||||
result[item.get('type')].append(item)
|
||||
|
||||
return result
|
||||
9
blockchainetl/logging_utils.py
Normal file
9
blockchainetl/logging_utils.py
Normal file
@@ -0,0 +1,9 @@
|
||||
import logging
|
||||
|
||||
|
||||
def logging_basic_config(filename=None):
|
||||
format = '%(asctime)s - %(name)s [%(levelname)s] - %(message)s'
|
||||
if filename is not None:
|
||||
logging.basicConfig(level=logging.INFO, format=format, filename=filename)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO, format=format)
|
||||
@@ -21,10 +21,3 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.cli.export_tokens import export_tokens
|
||||
|
||||
print('========================================================================================')
|
||||
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
|
||||
print('========================================================================================')
|
||||
|
||||
export_tokens()
|
||||
16
blockchainetl/streaming/postgres_utils.py
Normal file
16
blockchainetl/streaming/postgres_utils.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
|
||||
|
||||
def create_insert_statement_for_table(table):
|
||||
insert_stmt = insert(table)
|
||||
|
||||
primary_key_fields = [column.name for column in table.columns if column.primary_key]
|
||||
if primary_key_fields:
|
||||
insert_stmt = insert_stmt.on_conflict_do_update(
|
||||
index_elements=primary_key_fields,
|
||||
set_={
|
||||
column.name: insert_stmt.excluded[column.name] for column in table.columns if not column.primary_key
|
||||
}
|
||||
)
|
||||
|
||||
return insert_stmt
|
||||
139
blockchainetl/streaming/streamer.py
Normal file
139
blockchainetl/streaming/streamer.py
Normal file
@@ -0,0 +1,139 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
from blockchainetl.streaming.streamer_adapter_stub import StreamerAdapterStub
|
||||
from blockchainetl.file_utils import smart_open
|
||||
|
||||
|
||||
class Streamer:
|
||||
def __init__(
|
||||
self,
|
||||
blockchain_streamer_adapter=StreamerAdapterStub(),
|
||||
last_synced_block_file='last_synced_block.txt',
|
||||
lag=0,
|
||||
start_block=None,
|
||||
end_block=None,
|
||||
period_seconds=10,
|
||||
block_batch_size=10,
|
||||
retry_errors=True,
|
||||
pid_file=None):
|
||||
self.blockchain_streamer_adapter = blockchain_streamer_adapter
|
||||
self.last_synced_block_file = last_synced_block_file
|
||||
self.lag = lag
|
||||
self.start_block = start_block
|
||||
self.end_block = end_block
|
||||
self.period_seconds = period_seconds
|
||||
self.block_batch_size = block_batch_size
|
||||
self.retry_errors = retry_errors
|
||||
self.pid_file = pid_file
|
||||
|
||||
if self.start_block is not None or not os.path.isfile(self.last_synced_block_file):
|
||||
init_last_synced_block_file((self.start_block or 0) - 1, self.last_synced_block_file)
|
||||
|
||||
self.last_synced_block = read_last_synced_block(self.last_synced_block_file)
|
||||
|
||||
def stream(self):
|
||||
try:
|
||||
if self.pid_file is not None:
|
||||
logging.info('Creating pid file {}'.format(self.pid_file))
|
||||
write_to_file(self.pid_file, str(os.getpid()))
|
||||
self.blockchain_streamer_adapter.open()
|
||||
self._do_stream()
|
||||
finally:
|
||||
self.blockchain_streamer_adapter.close()
|
||||
if self.pid_file is not None:
|
||||
logging.info('Deleting pid file {}'.format(self.pid_file))
|
||||
delete_file(self.pid_file)
|
||||
|
||||
def _do_stream(self):
|
||||
while True and (self.end_block is None or self.last_synced_block < self.end_block):
|
||||
synced_blocks = 0
|
||||
|
||||
try:
|
||||
synced_blocks = self._sync_cycle()
|
||||
except Exception as e:
|
||||
# https://stackoverflow.com/a/4992124/1580227
|
||||
logging.exception('An exception occurred while syncing block data.')
|
||||
if not self.retry_errors:
|
||||
raise e
|
||||
|
||||
if synced_blocks <= 0:
|
||||
logging.info('Nothing to sync. Sleeping for {} seconds...'.format(self.period_seconds))
|
||||
time.sleep(self.period_seconds)
|
||||
|
||||
def _sync_cycle(self):
|
||||
current_block = self.blockchain_streamer_adapter.get_current_block_number()
|
||||
|
||||
target_block = self._calculate_target_block(current_block, self.last_synced_block)
|
||||
blocks_to_sync = max(target_block - self.last_synced_block, 0)
|
||||
|
||||
logging.info('Current block {}, target block {}, last synced block {}, blocks to sync {}'.format(
|
||||
current_block, target_block, self.last_synced_block, blocks_to_sync))
|
||||
|
||||
if blocks_to_sync != 0:
|
||||
self.blockchain_streamer_adapter.export_all(self.last_synced_block + 1, target_block)
|
||||
logging.info('Writing last synced block {}'.format(target_block))
|
||||
write_last_synced_block(self.last_synced_block_file, target_block)
|
||||
self.last_synced_block = target_block
|
||||
|
||||
return blocks_to_sync
|
||||
|
||||
def _calculate_target_block(self, current_block, last_synced_block):
|
||||
target_block = current_block - self.lag
|
||||
target_block = min(target_block, last_synced_block + self.block_batch_size)
|
||||
target_block = min(target_block, self.end_block) if self.end_block is not None else target_block
|
||||
return target_block
|
||||
|
||||
|
||||
def delete_file(file):
|
||||
try:
|
||||
os.remove(file)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def write_last_synced_block(file, last_synced_block):
|
||||
write_to_file(file, str(last_synced_block) + '\n')
|
||||
|
||||
|
||||
def init_last_synced_block_file(start_block, last_synced_block_file):
|
||||
if os.path.isfile(last_synced_block_file):
|
||||
raise ValueError(
|
||||
'{} should not exist if --start-block option is specified. '
|
||||
'Either remove the {} file or the --start-block option.'
|
||||
.format(last_synced_block_file, last_synced_block_file))
|
||||
write_last_synced_block(last_synced_block_file, start_block)
|
||||
|
||||
|
||||
def read_last_synced_block(file):
|
||||
with smart_open(file, 'r') as last_synced_block_file:
|
||||
return int(last_synced_block_file.read())
|
||||
|
||||
|
||||
def write_to_file(file, content):
|
||||
with smart_open(file, 'w') as file_handle:
|
||||
file_handle.write(content)
|
||||
13
blockchainetl/streaming/streamer_adapter_stub.py
Normal file
13
blockchainetl/streaming/streamer_adapter_stub.py
Normal file
@@ -0,0 +1,13 @@
|
||||
class StreamerAdapterStub:
|
||||
|
||||
def open(self):
|
||||
pass
|
||||
|
||||
def get_current_block_number(self):
|
||||
return 0
|
||||
|
||||
def export_all(self, start_block, end_block):
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
19
blockchainetl/streaming/streaming_utils.py
Normal file
19
blockchainetl/streaming/streaming_utils.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
|
||||
|
||||
def configure_signals():
|
||||
def sigterm_handler(_signo, _stack_frame):
|
||||
# Raises SystemExit(0):
|
||||
sys.exit(0)
|
||||
|
||||
signal.signal(signal.SIGTERM, sigterm_handler)
|
||||
|
||||
|
||||
def configure_logging(filename):
|
||||
for handler in logging.root.handlers[:]:
|
||||
logging.root.removeHandler(handler)
|
||||
logging_basic_config(filename=filename)
|
||||
42
docs/amazon-athena.md
Normal file
42
docs/amazon-athena.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Amazon Athena
|
||||
|
||||
## Querying in Amazon Athena
|
||||
|
||||
- Upload the files to S3:
|
||||
|
||||
```bash
|
||||
> cd output
|
||||
> aws s3 sync . s3://<your_bucket>/ethereumetl/export --region ap-southeast-1
|
||||
```
|
||||
|
||||
- Sign in to Athena https://console.aws.amazon.com/athena/home
|
||||
|
||||
- Create a database:
|
||||
|
||||
```sql
|
||||
CREATE DATABASE ethereumetl;
|
||||
```
|
||||
|
||||
- Create the tables:
|
||||
- blocks: [schemas/aws/blocks.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/blocks.sql)
|
||||
- transactions: [schemas/aws/transactions.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/transactions.sql)
|
||||
- token_transfers: [schemas/aws/token_transfers.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/token_transfers.sql)
|
||||
- contracts: [schemas/aws/contracts.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/contracts.sql)
|
||||
- receipts: [schemas/aws/receipts.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/receipts.sql)
|
||||
- logs: [schemas/aws/logs.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/logs.sql)
|
||||
- tokens: [schemas/aws/tokens.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/tokens.sql)
|
||||
|
||||
## Airflow DAGs
|
||||
|
||||
Refer to https://github.com/medvedev1088/ethereum-etl-airflow for the instructions.
|
||||
|
||||
## Tables for Parquet Files
|
||||
|
||||
Read [this article](https://medium.com/@medvedev1088/converting-ethereum-etl-files-to-parquet-399e048ddd30) on how to convert CSVs to Parquet.
|
||||
|
||||
- Create the tables:
|
||||
- parquet_blocks: [schemas/aws/parquet/parquet_blocks.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/parquet/parquet_blocks.sql)
|
||||
- parquet_transactions: [schemas/aws/parquet/parquet_transactions.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/parquet/parquet_transactions.sql)
|
||||
- parquet_token_transfers: [schemas/aws/parquet/parquet_token_transfers.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/parquet/parquet_token_transfers.sql)
|
||||
|
||||
Note that [DECIMAL type is limited to 38 digits in Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-decimal) so values greater than 38 decimals will be null.
|
||||
238
docs/commands.md
Normal file
238
docs/commands.md
Normal file
@@ -0,0 +1,238 @@
|
||||
# Commands
|
||||
|
||||
All the commands accept `-h` parameter for help, e.g.:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_blocks_and_transactions -h
|
||||
|
||||
Usage: ethereumetl export_blocks_and_transactions [OPTIONS]
|
||||
|
||||
Export blocks and transactions.
|
||||
|
||||
Options:
|
||||
-s, --start-block INTEGER Start block
|
||||
-e, --end-block INTEGER End block [required]
|
||||
-b, --batch-size INTEGER The number of blocks to export at a time.
|
||||
-p, --provider-uri TEXT The URI of the web3 provider e.g.
|
||||
file://$HOME/Library/Ethereum/geth.ipc or
|
||||
https://mainnet.infura.io
|
||||
-w, --max-workers INTEGER The maximum number of workers.
|
||||
--blocks-output TEXT The output file for blocks. If not provided
|
||||
blocks will not be exported. Use "-" for stdout
|
||||
--transactions-output TEXT The output file for transactions. If not
|
||||
provided transactions will not be exported. Use
|
||||
"-" for stdout
|
||||
-h, --help Show this message and exit.
|
||||
```
|
||||
|
||||
For the `--output` parameters the supported types are csv and json. The format type is inferred from the output file name.
|
||||
|
||||
#### export_blocks_and_transactions
|
||||
|
||||
```bash
|
||||
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc \
|
||||
--blocks-output blocks.csv --transactions-output transactions.csv
|
||||
```
|
||||
|
||||
Omit `--blocks-output` or `--transactions-output` options if you want to export only transactions/blocks.
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
[Blocks and transactions schema](schema.md#blockscsv).
|
||||
|
||||
#### export_token_transfers
|
||||
|
||||
The API used in this command is not supported by Infura, so you will need a local node.
|
||||
If you want to use Infura for exporting ERC20 transfers refer to [extract_token_transfers](#extract_token_transfers)
|
||||
|
||||
```bash
|
||||
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --batch-size 100 --output token_transfers.csv
|
||||
```
|
||||
|
||||
Include `--tokens <token1> --tokens <token2>` to filter only certain tokens, e.g.
|
||||
|
||||
```bash
|
||||
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output token_transfers.csv \
|
||||
--tokens 0x86fa049857e0209aa7d9e616f7eb3b3b78ecfdb0 --tokens 0x06012c8cf97bead5deae237070f9587f8e7a266d
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
[Token transfers schema](schema.md#token_transferscsv).
|
||||
|
||||
#### export_receipts_and_logs
|
||||
|
||||
First extract transaction hashes from `transactions.csv`
|
||||
(Exported with [export_blocks_and_transactions](#export_blocks_and_transactions)):
|
||||
|
||||
```bash
|
||||
> ethereumetl extract_csv_column --input transactions.csv --column hash --output transaction_hashes.txt
|
||||
```
|
||||
|
||||
Then export receipts and logs:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_receipts_and_logs --transaction-hashes transaction_hashes.txt \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --receipts-output receipts.csv --logs-output logs.csv
|
||||
```
|
||||
|
||||
Omit `--receipts-output` or `--logs-output` options if you want to export only logs/receipts.
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
Upvote this feature request https://github.com/paritytech/parity/issues/9075,
|
||||
it will make receipts and logs export much faster.
|
||||
|
||||
[Receipts and logs schema](schema.md#receiptscsv).
|
||||
|
||||
#### extract_token_transfers
|
||||
|
||||
First export receipt logs with [export_receipts_and_logs](#export_receipts_and_logs).
|
||||
|
||||
Then extract transfers from the logs.csv file:
|
||||
|
||||
```bash
|
||||
> ethereumetl extract_token_transfers --logs logs.csv --output token_transfers.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
[Token transfers schema](schema.md#token_transferscsv).
|
||||
|
||||
#### export_contracts
|
||||
|
||||
First extract contract addresses from `receipts.csv`
|
||||
(Exported with [export_receipts_and_logs](#export_receipts_and_logs)):
|
||||
|
||||
```bash
|
||||
> ethereumetl extract_csv_column --input receipts.csv --column contract_address --output contract_addresses.txt
|
||||
```
|
||||
|
||||
Then export contracts:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_contracts --contract-addresses contract_addresses.txt \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output contracts.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
[Contracts schema](schema.md#contractscsv).
|
||||
|
||||
#### export_tokens
|
||||
|
||||
First extract token addresses from `contracts.json`
|
||||
(Exported with [export_contracts](#export_contracts)):
|
||||
|
||||
```bash
|
||||
> ethereumetl filter_items -i contracts.json -p "item['is_erc20'] or item['is_erc721']" | \
|
||||
ethereumetl extract_field -f address -o token_addresses.txt
|
||||
```
|
||||
|
||||
Then export ERC20 / ERC721 tokens:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_tokens --token-addresses token_addresses.txt \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output tokens.csv
|
||||
```
|
||||
|
||||
You can tune `--max-workers` for performance.
|
||||
|
||||
[Tokens schema](schema.md#tokenscsv).
|
||||
|
||||
#### export_traces
|
||||
|
||||
Also called internal transactions.
|
||||
The API used in this command is not supported by Infura,
|
||||
so you will need a local Parity archive node (`parity --tracing on`).
|
||||
Make sure your node has at least 8GB of memory, or else you will face timeout errors.
|
||||
See [this issue](https://github.com/blockchain-etl/ethereum-etl/issues/137)
|
||||
|
||||
```bash
|
||||
> ethereumetl export_traces --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/parity.ipc --batch-size 100 --output traces.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
[Traces schema](schema.md#tracescsv).
|
||||
|
||||
#### export_geth_traces
|
||||
|
||||
Read [Differences between geth and parity traces.csv](schema.md#differences-between-geth-and-parity-tracescsv)
|
||||
|
||||
The API used in this command is not supported by Infura,
|
||||
so you will need a local Geth archive node (`geth --gcmode archive --syncmode full --ipcapi debug`).
|
||||
When using rpc, add `--rpc --rpcapi debug` options.
|
||||
|
||||
```bash
|
||||
> ethereumetl export_geth_traces --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --batch-size 100 --output geth_traces.json
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
#### extract_geth_traces
|
||||
|
||||
```bash
|
||||
> ethereumetl extract_geth_traces --input geth_traces.json --output traces.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
#### get_block_range_for_date
|
||||
|
||||
```bash
|
||||
> ethereumetl get_block_range_for_date --provider-uri=https://mainnet.infura.io --date 2018-01-01
|
||||
4832686,4838611
|
||||
```
|
||||
|
||||
#### get_keccak_hash
|
||||
|
||||
```bash
|
||||
> ethereumetl get_keccak_hash -i "transfer(address,uint256)"
|
||||
0xa9059cbb2ab09eb219583f4a59a5d0623ade346d962bcd4e46b11da047c9049b
|
||||
```
|
||||
|
||||
#### stream
|
||||
|
||||
```bash
|
||||
> pip3 install ethereum-etl[streaming]
|
||||
> ethereumetl stream --provider-uri https://mainnet.infura.io --start-block 500000
|
||||
```
|
||||
|
||||
- This command outputs blocks, transactions, logs, token_transfers to the console by default.
|
||||
- Entity types can be specified with the `-e` option,
|
||||
e.g. `-e block,transaction,log,token_transfer,trace,contract,token`.
|
||||
- Use `--output` option to specify the Google Pub/Sub topic or Postgres database where to publish blockchain data,
|
||||
- For Google PubSub: `--output=projects/<your-project>/topics/crypto_ethereum`.
|
||||
Data will be pushed to `projects/<your-project>/topics/crypto_ethereum.blocks`, `projects/<your-project>/topics/crypto_ethereum.transactions` etc. topics.
|
||||
- For Postgres: `--output=postgresql+pg8000://<user>:<password>@<host>:<port>/<database_name>`,
|
||||
e.g. `--output=postgresql+pg8000://postgres:admin@127.0.0.1:5432/ethereum`.
|
||||
The [schema](https://github.com/blockchain-etl/ethereum-etl-postgres/tree/master/schema)
|
||||
and [indexes](https://github.com/blockchain-etl/ethereum-etl-postgres/tree/master/indexes) can be found in this
|
||||
repo [ethereum-etl-postgres](https://github.com/blockchain-etl/ethereum-etl-postgres).
|
||||
- The command saves its state to `last_synced_block.txt` file where the last synced block number is saved periodically.
|
||||
- Specify either `--start-block` or `--last-synced-block-file` option. `--last-synced-block-file` should point to the
|
||||
file where the block number, from which to start streaming the blockchain data, is saved.
|
||||
- Use the `--lag` option to specify how many blocks to lag behind the head of the blockchain. It's the simplest way to
|
||||
handle chain reorganizations - they are less likely the further a block from the head.
|
||||
- You can tune `--period-seconds`, `--batch-size`, `--block-batch-size`, `--max-workers` for performance.
|
||||
- Refer to [blockchain-etl-streaming](https://github.com/blockchain-etl/blockchain-etl-streaming) for
|
||||
instructions on deploying it to Kubernetes.
|
||||
|
||||
Stream blockchain data continually to Google Pub/Sub:
|
||||
|
||||
```bash
|
||||
> export GOOGLE_APPLICATION_CREDENTIALS=/path_to_credentials_file.json
|
||||
> ethereumetl stream --start-block 500000 --output projects/<your-project>/topics/crypto_ethereum
|
||||
```
|
||||
|
||||
Stream blockchain data to a Postgres database:
|
||||
|
||||
```bash
|
||||
ethereumetl stream --start-block 500000 --output postgresql+pg8000://<user>:<password>@<host>:5432/<database>
|
||||
```
|
||||
4
docs/contact.md
Normal file
4
docs/contact.md
Normal file
@@ -0,0 +1,4 @@
|
||||
# Contact
|
||||
|
||||
- [D5 Discord Server](https://discord.gg/wukrezR)
|
||||
- [Telegram Group](https://t.me/joinchat/GsMpbA3mv1OJ6YMp3T5ORQ)
|
||||
11
docs/dockerhub.md
Normal file
11
docs/dockerhub.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# Uploading to Docker Hub
|
||||
|
||||
```bash
|
||||
ETHEREUMETL_STREAMING_VERSION=1.4.0
|
||||
docker build -t ethereum-etl:${ETHEREUMETL_STREAMING_VERSION} -f Dockerfile .
|
||||
docker tag ethereum-etl:${ETHEREUMETL_STREAMING_VERSION} blockchainetl/ethereum-etl:${ETHEREUMETL_STREAMING_VERSION}
|
||||
docker push blockchainetl/ethereum-etl:${ETHEREUMETL_STREAMING_VERSION}
|
||||
|
||||
docker tag ethereum-etl:${ETHEREUMETL_STREAMING_VERSION} blockchainetl/ethereum-etl:latest
|
||||
docker push blockchainetl/ethereum-etl:latest
|
||||
```
|
||||
4
docs/ethereum-classic.md
Normal file
4
docs/ethereum-classic.md
Normal file
@@ -0,0 +1,4 @@
|
||||
# Ethereum Classic
|
||||
|
||||
For getting ETC csv files, make sure you pass in the `--chain classic` param where it's required for the scripts you want to export.
|
||||
ETC won't run if your `--provider-uri` is Infura. It will provide a warning and change the provider-uri to `https://ethereumclassic.network` instead. For faster performance, run a client instead locally for classic such as `parity chain=classic` and Geth-classic.
|
||||
51
docs/exporting-the-blockchain.md
Normal file
51
docs/exporting-the-blockchain.md
Normal file
@@ -0,0 +1,51 @@
|
||||
## Exporting the Blockchain
|
||||
|
||||
If you'd like to have blockchain data set up and hosted for you, [get in touch with us at D5](https://d5.ai/?ref=ethereumetl).
|
||||
|
||||
1. Install python 3.5.3+ https://www.python.org/downloads/
|
||||
|
||||
1. You can use Infura if you don't need ERC20 transfers (Infura doesn't support eth_getFilterLogs JSON RPC method).
|
||||
For that use `-p https://mainnet.infura.io` option for the commands below. If you need ERC20 transfers or want to
|
||||
export the data ~40 times faster, you will need to set up a local Ethereum node:
|
||||
|
||||
1. Install geth https://github.com/ethereum/go-ethereum/wiki/Installing-Geth
|
||||
|
||||
1. Start geth.
|
||||
Make sure it downloaded the blocks that you need by executing `eth.syncing` in the JS console.
|
||||
You can export blocks below `currentBlock`,
|
||||
there is no need to wait until the full sync as the state is not needed (unless you also need contracts bytecode
|
||||
and token details; for those you need to wait until the full sync).
|
||||
|
||||
1. Install Ethereum ETL: `> pip3 install ethereum-etl`
|
||||
|
||||
1. Export all:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_all --help
|
||||
> ethereumetl export_all -s 0 -e 5999999 -b 100000 -p file://$HOME/Library/Ethereum/geth.ipc -o output
|
||||
```
|
||||
|
||||
In case `ethereumetl` command is not available in PATH, use `python3 -m ethereumetl` instead.
|
||||
|
||||
The result will be in the `output` subdirectory, partitioned in Hive style:
|
||||
```bash
|
||||
output/blocks/start_block=00000000/end_block=00099999/blocks_00000000_00099999.csv
|
||||
output/blocks/start_block=00100000/end_block=00199999/blocks_00100000_00199999.csv
|
||||
...
|
||||
output/transactions/start_block=00000000/end_block=00099999/transactions_00000000_00099999.csv
|
||||
...
|
||||
output/token_transfers/start_block=00000000/end_block=00099999/token_transfers_00000000_00099999.csv
|
||||
...
|
||||
```
|
||||
|
||||
Should work with geth and parity, on Linux, Mac, Windows.
|
||||
If you use Parity you should disable warp mode with `--no-warp` option because warp mode
|
||||
does not place all of the block or receipt data into the database https://wiki.parity.io/Getting-Synced
|
||||
|
||||
If you see weird behavior, e.g. wrong number of rows in the CSV files or corrupted files,
|
||||
check out this issue: https://github.com/medvedev1088/ethereum-etl/issues/28
|
||||
|
||||
### Export in 2 Hours
|
||||
|
||||
You can use AWS Auto Scaling and Data Pipeline to reduce the exporting time to a few hours.
|
||||
Read [this article](https://medium.com/@medvedev1088/how-to-export-the-entire-ethereum-blockchain-to-csv-in-2-hours-for-10-69fef511e9a2) for details.
|
||||
19
docs/google-bigquery.md
Normal file
19
docs/google-bigquery.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Google BiqQuery
|
||||
|
||||
## Querying in BigQuery
|
||||
|
||||
If you'd rather not export the blockchain data yourself, we publish all tables as a public dataset in [BigQuery](https://medium.com/@medvedev1088/ethereum-blockchain-on-google-bigquery-283fb300f579).
|
||||
|
||||
Data is updated near real-time (~4-minute delay to account for block finality).
|
||||
|
||||
### How to Query Balances for all Ethereum Addresses
|
||||
|
||||
Read [this article](https://medium.com/google-cloud/how-to-query-balances-for-all-ethereum-addresses-in-bigquery-fb594e4034a7).
|
||||
|
||||
### Building Token Recommender in Google Cloud Platform
|
||||
|
||||
Read [this article](https://medium.com/google-cloud/building-token-recommender-in-google-cloud-platform-1be5a54698eb).
|
||||
|
||||
### Awesome BigQuery Views
|
||||
|
||||
https://github.com/blockchain-etl/awesome-bigquery-views
|
||||
24
docs/index.md
Normal file
24
docs/index.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Overview
|
||||
|
||||
Ethereum ETL lets you convert blockchain data into convenient formats like CSVs and relational databases.
|
||||
|
||||
With 700+ likes on Github, Ethereum ETL is the most popular open source project for Ethereum data.
|
||||
|
||||
Data is available for you to query right away in [Google BigQuery](https://goo.gl/oY5BCQ).
|
||||
|
||||
## Features
|
||||
|
||||
Easily export:
|
||||
|
||||
* Blocks
|
||||
* Transactions
|
||||
* ERC20 / ERC721 tokens
|
||||
* Token transfers
|
||||
* Receipts
|
||||
* Logs
|
||||
* Contracts
|
||||
* Internal transactions
|
||||
|
||||
## Projects using Ethereum ETL
|
||||
* [Google](https://goo.gl/oY5BCQ) - Public BigQuery Ethereum datasets
|
||||
* [Nansen by D5](https://d5.ai/?ref=ethereumetl) - Analytics platform for Ethereum
|
||||
15
docs/limitations.md
Normal file
15
docs/limitations.md
Normal file
@@ -0,0 +1,15 @@
|
||||
# Limitation
|
||||
|
||||
- In case the contract is a proxy, which forwards all calls to a delegate, interface detection doesn’t work,
|
||||
which means `is_erc20` and `is_erc721` will always be false for proxy contracts and they will be missing in the `tokens`
|
||||
table.
|
||||
- The metadata methods (`symbol`, `name`, `decimals`, `total_supply`) for ERC20 are optional, so around 10% of the
|
||||
contracts are missing this data. Also some contracts (EOS) implement these methods but with wrong return type,
|
||||
so the metadata columns are missing in this case as well.
|
||||
- `token_transfers.value`, `tokens.decimals` and `tokens.total_supply` have type `STRING` in BigQuery tables,
|
||||
because numeric types there can't handle 32-byte integers. You should use
|
||||
`cast(value as FLOAT64)` (possible loss of precision) or
|
||||
`safe_cast(value as NUMERIC)` (possible overflow) to convert to numbers.
|
||||
- The contracts that don't implement `decimals()` function but have the
|
||||
[fallback function](https://solidity.readthedocs.io/en/v0.4.21/contracts.html#fallback-function) that returns a `boolean`
|
||||
will have `0` or `1` in the `decimals` column in the CSVs.
|
||||
9
docs/media.md
Normal file
9
docs/media.md
Normal file
@@ -0,0 +1,9 @@
|
||||
## Ethereum ETL in the Media
|
||||
|
||||
- [A Technical Breakdown Of Google's New Blockchain Search Tools](https://www.forbes.com/sites/michaeldelcastillo/2019/02/05/google-launches-search-for-bitcoin-ethereum-bitcoin-cash-dash-dogecoin-ethereum-classic-litecoin-and-zcash/#394fc868c789)
|
||||
- [Navigating Bitcoin, Ethereum, XRP: How Google Is Quietly Making Blockchains Searchable](https://www.forbes.com/sites/michaeldelcastillo/2019/02/04/navigating-bitcoin-ethereum-xrp-how-google-is-quietly-making-blockchains-searchable/?ss=crypto-blockchain#49e111da4248)
|
||||
- [Ethereum in BigQuery: a Public Dataset for smart contract analytics](https://cloud.google.com/blog/products/data-analytics/ethereum-bigquery-public-dataset-smart-contract-analytics)
|
||||
- [Ethereum in BigQuery: how we built this dataset](https://cloud.google.com/blog/products/data-analytics/ethereum-bigquery-how-we-built-dataset)
|
||||
- [Introducing six new cryptocurrencies in BigQuery Public Datasets—and how to analyze them](https://cloud.google.com/blog/products/data-analytics/introducing-six-new-cryptocurrencies-in-bigquery-public-datasets-and-how-to-analyze-them)
|
||||
- [Querying the Ethereum Blockchain in Snowflake](https://community.snowflake.com/s/article/Querying-the-Ethereum-Blockchain-in-Snowflake)
|
||||
- [ConsenSys Grants funds third cohort of projects to benefit the Ethereum ecosystem](https://www.cryptoninjas.net/2020/02/17/consensys-grants-funds-third-cohort-of-projects-to-benefit-the-ethereum-ecosystem/)
|
||||
45
docs/quickstart.md
Normal file
45
docs/quickstart.md
Normal file
@@ -0,0 +1,45 @@
|
||||
# Quickstart
|
||||
|
||||
Install Ethereum ETL:
|
||||
|
||||
```bash
|
||||
pip3 install ethereum-etl
|
||||
```
|
||||
|
||||
Export blocks and transactions:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
|
||||
--provider-uri https://mainnet.infura.io --blocks-output blocks.csv --transactions-output transactions.csv
|
||||
```
|
||||
|
||||
Export ERC20 and ERC721 transfers:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output token_transfers.csv
|
||||
```
|
||||
|
||||
Export traces:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_traces --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/parity.ipc --output traces.csv
|
||||
```
|
||||
|
||||
Stream blocks, transactions, logs, token_transfers continually to console:
|
||||
|
||||
```bash
|
||||
> pip3 install ethereum-etl[streaming]
|
||||
> ethereumetl stream --start-block 500000 -e block,transaction,log,token_transfer --log-file log.txt
|
||||
```
|
||||
|
||||
Find all commands [here](commands.md).
|
||||
|
||||
---
|
||||
|
||||
To run the latest version of Ethereum ETL, check out the repo and call
|
||||
```bash
|
||||
> pip3 install -e .
|
||||
> python3 ethereumetl.py
|
||||
```
|
||||
152
docs/schema.md
Normal file
152
docs/schema.md
Normal file
@@ -0,0 +1,152 @@
|
||||
# Schema
|
||||
|
||||
## blocks.csv
|
||||
|
||||
Column | Type |
|
||||
------------------|--------------------|
|
||||
number | bigint |
|
||||
hash | hex_string |
|
||||
parent_hash | hex_string |
|
||||
nonce | hex_string |
|
||||
sha3_uncles | hex_string |
|
||||
logs_bloom | hex_string |
|
||||
transactions_root | hex_string |
|
||||
state_root | hex_string |
|
||||
receipts_root | hex_string |
|
||||
miner | address |
|
||||
difficulty | numeric |
|
||||
total_difficulty | numeric |
|
||||
size | bigint |
|
||||
extra_data | hex_string |
|
||||
gas_limit | bigint |
|
||||
gas_used | bigint |
|
||||
timestamp | bigint |
|
||||
transaction_count | bigint |
|
||||
|
||||
---
|
||||
|
||||
## transactions.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------|-------------|
|
||||
hash | hex_string |
|
||||
nonce | bigint |
|
||||
block_hash | hex_string |
|
||||
block_number | bigint |
|
||||
transaction_index| bigint |
|
||||
from_address | address |
|
||||
to_address | address |
|
||||
value | numeric |
|
||||
gas | bigint |
|
||||
gas_price | bigint |
|
||||
input | hex_string |
|
||||
block_timestamp | bigint |
|
||||
|
||||
---
|
||||
|
||||
## token_transfers.csv
|
||||
|
||||
Column | Type |
|
||||
--------------------|-------------|
|
||||
token_address | address |
|
||||
from_address | address |
|
||||
to_address | address |
|
||||
value | numeric |
|
||||
transaction_hash | hex_string |
|
||||
log_index | bigint |
|
||||
block_number | bigint |
|
||||
|
||||
---
|
||||
|
||||
## receipts.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
transaction_hash | hex_string |
|
||||
transaction_index | bigint |
|
||||
block_hash | hex_string |
|
||||
block_number | bigint |
|
||||
cumulative_gas_used | bigint |
|
||||
gas_used | bigint |
|
||||
contract_address | address |
|
||||
root | hex_string |
|
||||
status | bigint |
|
||||
|
||||
---
|
||||
|
||||
## logs.csv
|
||||
|
||||
Column | Type |
|
||||
-------------------------|-------------|
|
||||
log_index | bigint |
|
||||
transaction_hash | hex_string |
|
||||
transaction_index | bigint |
|
||||
block_hash | hex_string |
|
||||
block_number | bigint |
|
||||
address | address |
|
||||
data | hex_string |
|
||||
topics | string |
|
||||
|
||||
---
|
||||
|
||||
## contracts.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
address | address |
|
||||
bytecode | hex_string |
|
||||
function_sighashes | string |
|
||||
is_erc20 | boolean |
|
||||
is_erc721 | boolean |
|
||||
block_number | bigint |
|
||||
|
||||
---
|
||||
|
||||
## tokens.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
address | address |
|
||||
symbol | string |
|
||||
name | string |
|
||||
decimals | bigint |
|
||||
total_supply | numeric |
|
||||
|
||||
---
|
||||
|
||||
## traces.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
block_number | bigint |
|
||||
transaction_hash | hex_string |
|
||||
transaction_index | bigint |
|
||||
from_address | address |
|
||||
to_address | address |
|
||||
value | numeric |
|
||||
input | hex_string |
|
||||
output | hex_string |
|
||||
trace_type | string |
|
||||
call_type | string |
|
||||
reward_type | string |
|
||||
gas | bigint |
|
||||
gas_used | bigint |
|
||||
subtraces | bigint |
|
||||
trace_address | string |
|
||||
error | string |
|
||||
status | bigint |
|
||||
|
||||
### Differences between geth and parity traces.csv
|
||||
|
||||
- `to_address` field differs for `callcode` trace (geth seems to return correct value, as parity value of `to_address` is same as `to_address` of parent call);
|
||||
- geth output doesn't have `reward` traces;
|
||||
- geth output doesn't have `to_address`, `from_address`, `value` for `suicide` traces;
|
||||
- `error` field contains human readable error message, which might differ in geth/parity output;
|
||||
- geth output doesn't have `transaction_hash`;
|
||||
- `gas_used` is 0 on traces with error in geth, empty in parity;
|
||||
- zero output of subcalls is `0x000...` in geth, `0x` in parity;
|
||||
|
||||
You can find column descriptions in [https://github.com/medvedev1088/ethereum-etl-airflow](https://github.com/medvedev1088/ethereum-etl-airflow/tree/master/dags/resources/stages/raw/schemas)
|
||||
|
||||
Note: for the `address` type all hex characters are lower-cased.
|
||||
`boolean` type can have 2 values: `True` or `False`.
|
||||
@@ -29,17 +29,21 @@ from ethereumetl.cli.export_receipts_and_logs import export_receipts_and_logs
|
||||
from ethereumetl.cli.export_token_transfers import export_token_transfers
|
||||
from ethereumetl.cli.export_tokens import export_tokens
|
||||
from ethereumetl.cli.export_traces import export_traces
|
||||
from ethereumetl.cli.extract_contracts import extract_contracts
|
||||
from ethereumetl.cli.extract_csv_column import extract_csv_column
|
||||
from ethereumetl.cli.extract_field import extract_field
|
||||
from ethereumetl.cli.extract_geth_traces import extract_geth_traces
|
||||
from ethereumetl.cli.extract_token_transfers import extract_token_transfers
|
||||
from ethereumetl.cli.extract_tokens import extract_tokens
|
||||
from ethereumetl.cli.filter_items import filter_items
|
||||
from ethereumetl.cli.get_block_range_for_date import get_block_range_for_date
|
||||
from ethereumetl.cli.get_block_range_for_timestamps import get_block_range_for_timestamps
|
||||
from ethereumetl.cli.get_keccak_hash import get_keccak_hash
|
||||
from ethereumetl.cli.stream import stream
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option(version='1.4.0')
|
||||
@click.pass_context
|
||||
def cli(ctx):
|
||||
pass
|
||||
@@ -56,6 +60,11 @@ cli.add_command(export_tokens, "export_tokens")
|
||||
cli.add_command(export_traces, "export_traces")
|
||||
cli.add_command(export_geth_traces, "export_geth_traces")
|
||||
cli.add_command(extract_geth_traces, "extract_geth_traces")
|
||||
cli.add_command(extract_contracts, "extract_contracts")
|
||||
cli.add_command(extract_tokens, "extract_tokens")
|
||||
|
||||
# streaming
|
||||
cli.add_command(stream, "stream")
|
||||
|
||||
# utils
|
||||
cli.add_command(get_block_range_for_date, "get_block_range_for_date")
|
||||
|
||||
@@ -25,11 +25,16 @@ import click
|
||||
import re
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from web3 import Web3
|
||||
|
||||
from ethereumetl.jobs.export_all_common import export_all_common
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.service.eth_service import EthService
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
def is_date_range(start, end):
|
||||
@@ -74,7 +79,7 @@ def get_partitions(start, end, partition_batch_size, provider_uri):
|
||||
|
||||
while start_date <= end_date:
|
||||
batch_start_block, batch_end_block = eth_service.get_block_range_for_date(start_date)
|
||||
partition_dir = f'/date={str(start_date)}/'
|
||||
partition_dir = '/date={start_date!s}/'.format(start_date=start_date)
|
||||
yield batch_start_block, batch_end_block, partition_dir
|
||||
start_date += day
|
||||
|
||||
@@ -89,7 +94,10 @@ def get_partitions(start, end, partition_batch_size, provider_uri):
|
||||
|
||||
padded_batch_start_block = str(batch_start_block).zfill(8)
|
||||
padded_batch_end_block = str(batch_end_block).zfill(8)
|
||||
partition_dir = f'/start_block={padded_batch_start_block}/end_block={padded_batch_end_block}'
|
||||
partition_dir = '/start_block={padded_batch_start_block}/end_block={padded_batch_end_block}'.format(
|
||||
padded_batch_start_block=padded_batch_start_block,
|
||||
padded_batch_end_block=padded_batch_end_block,
|
||||
)
|
||||
yield batch_start_block, batch_end_block, partition_dir
|
||||
|
||||
else:
|
||||
@@ -99,15 +107,18 @@ def get_partitions(start, end, partition_batch_size, provider_uri):
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-s', '--start', required=True, type=str, help='Start block/ISO date/Unix time')
|
||||
@click.option('-e', '--end', required=True, type=str, help='End block/ISO date/Unix time')
|
||||
@click.option('-b', '--partition-batch-size', default=10000, type=int,
|
||||
@click.option('-b', '--partition-batch-size', default=10000, show_default=True, type=int,
|
||||
help='The number of blocks to export in partition.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-o', '--output-dir', default='output', type=str, help='Output directory, partitioned in Hive style.')
|
||||
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
|
||||
@click.option('-B', '--export-batch-size', default=100, type=int, help='The number of requests in JSON RPC batches.')
|
||||
def export_all(start, end, partition_batch_size, provider_uri, output_dir, max_workers, export_batch_size):
|
||||
"""Exports all for a range of blocks."""
|
||||
@click.option('-o', '--output-dir', default='output', show_default=True, type=str, help='Output directory, partitioned in Hive style.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-B', '--export-batch-size', default=100, show_default=True, type=int, help='The number of requests in JSON RPC batches.')
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def export_all(start, end, partition_batch_size, provider_uri, output_dir, max_workers, export_batch_size,
|
||||
chain='ethereum'):
|
||||
"""Exports all data for a range of blocks."""
|
||||
provider_uri = check_classic_provider_uri(chain, provider_uri)
|
||||
export_all_common(get_partitions(start, end, partition_batch_size, provider_uri),
|
||||
output_dir, provider_uri, max_workers, export_batch_size)
|
||||
|
||||
@@ -25,28 +25,32 @@ import click
|
||||
|
||||
from ethereumetl.jobs.export_blocks_job import ExportBlocksJob
|
||||
from ethereumetl.jobs.exporters.blocks_and_transactions_item_exporter import blocks_and_transactions_item_exporter
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-s', '--start-block', default=0, type=int, help='Start block')
|
||||
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
|
||||
@click.option('-e', '--end-block', required=True, type=int, help='End block')
|
||||
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to export at a time.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to export at a time.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
|
||||
@click.option('--blocks-output', default=None, type=str,
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('--blocks-output', default=None, show_default=True, type=str,
|
||||
help='The output file for blocks. If not provided blocks will not be exported. Use "-" for stdout')
|
||||
@click.option('--transactions-output', default=None, type=str,
|
||||
@click.option('--transactions-output', default=None, show_default=True, type=str,
|
||||
help='The output file for transactions. '
|
||||
'If not provided transactions will not be exported. Use "-" for stdout')
|
||||
def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, max_workers, blocks_output, transactions_output):
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, max_workers, blocks_output,
|
||||
transactions_output, chain='ethereum'):
|
||||
"""Exports blocks and transactions."""
|
||||
provider_uri = check_classic_provider_uri(chain, provider_uri)
|
||||
if blocks_output is None and transactions_output is None:
|
||||
raise ValueError('Either --blocks-output or --transactions-output options must be provided')
|
||||
|
||||
|
||||
@@ -23,27 +23,30 @@
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.export_contracts_job import ExportContractsJob
|
||||
from ethereumetl.jobs.exporters.contracts_item_exporter import contracts_item_exporter
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-c', '--contract-addresses', required=True, type=str,
|
||||
help='The file containing contract addresses, one per line.')
|
||||
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
def export_contracts(batch_size, contract_addresses, output, max_workers, provider_uri):
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def export_contracts(batch_size, contract_addresses, output, max_workers, provider_uri, chain='ethereum'):
|
||||
"""Exports contracts bytecode and sighashes."""
|
||||
check_classic_provider_uri(chain, provider_uri)
|
||||
with smart_open(contract_addresses, 'r') as contract_addresses_file:
|
||||
contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file
|
||||
if contract_address.strip())
|
||||
|
||||
@@ -25,7 +25,7 @@ import click
|
||||
|
||||
from ethereumetl.jobs.export_geth_traces_job import ExportGethTracesJob
|
||||
from ethereumetl.jobs.exporters.geth_traces_item_exporter import geth_traces_item_exporter
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
|
||||
@@ -33,12 +33,12 @@ logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-s', '--start-block', default=0, type=int, help='Start block')
|
||||
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
|
||||
@click.option('-e', '--end-block', required=True, type=int, help='End block')
|
||||
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to process at a time.')
|
||||
@click.option('-o', '--output', default='-', type=str,
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to process at a time.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str,
|
||||
help='The output file for geth traces. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', required=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or http://localhost:8545/')
|
||||
|
||||
@@ -23,31 +23,35 @@
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.export_receipts_job import ExportReceiptsJob
|
||||
from ethereumetl.jobs.exporters.receipts_and_logs_item_exporter import receipts_and_logs_item_exporter
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-b', '--batch-size', default=100, type=int, help='The number of receipts to export at a time.')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of receipts to export at a time.')
|
||||
@click.option('-t', '--transaction-hashes', required=True, type=str,
|
||||
help='The file containing transaction hashes, one per line.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
|
||||
@click.option('--receipts-output', default=None, type=str,
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('--receipts-output', default=None, show_default=True, type=str,
|
||||
help='The output file for receipts. If not provided receipts will not be exported. Use "-" for stdout')
|
||||
@click.option('--logs-output', default=None, type=str,
|
||||
@click.option('--logs-output', default=None, show_default=True, type=str,
|
||||
help='The output file for receipt logs. '
|
||||
'aIf not provided receipt logs will not be exported. Use "-" for stdout')
|
||||
def export_receipts_and_logs(batch_size, transaction_hashes, provider_uri, max_workers, receipts_output, logs_output):
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def export_receipts_and_logs(batch_size, transaction_hashes, provider_uri, max_workers, receipts_output, logs_output,
|
||||
chain='ethereum'):
|
||||
"""Exports receipts and logs."""
|
||||
provider_uri = check_classic_provider_uri(chain, provider_uri)
|
||||
with smart_open(transaction_hashes, 'r') as transaction_hashes_file:
|
||||
job = ExportReceiptsJob(
|
||||
transaction_hashes_iterable=(transaction_hash.strip() for transaction_hash in transaction_hashes_file),
|
||||
|
||||
@@ -27,7 +27,7 @@ from web3 import Web3
|
||||
|
||||
from ethereumetl.jobs.export_token_transfers_job import ExportTokenTransfersJob
|
||||
from ethereumetl.jobs.exporters.token_transfers_item_exporter import token_transfers_item_exporter
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
|
||||
@@ -35,14 +35,14 @@ logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-s', '--start-block', default=0, type=int, help='Start block')
|
||||
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
|
||||
@click.option('-e', '--end-block', required=True, type=int, help='End block')
|
||||
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', required=True, type=str,
|
||||
help='The URI of the web3 provider e.g. file://$HOME/Library/Ethereum/geth.ipc or http://localhost:8545/')
|
||||
@click.option('-t', '--tokens', default=None, type=str, nargs=1, help='The list of token addresses to filter by.')
|
||||
@click.option('-t', '--tokens', default=None, show_default=True, type=str, nargs=1, help='The list of token addresses to filter by.')
|
||||
def export_token_transfers(start_block, end_block, batch_size, output, max_workers, provider_uri, tokens):
|
||||
"""Exports ERC20/ERC721 transfers."""
|
||||
job = ExportTokenTransfersJob(
|
||||
|
||||
@@ -25,25 +25,29 @@ import click
|
||||
|
||||
from web3 import Web3
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.export_tokens_job import ExportTokensJob
|
||||
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-t', '--token-addresses', type=str, help='The file containing token addresses, one per line.')
|
||||
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
|
||||
@click.option('-t', '--token-addresses', required=True, type=str,
|
||||
help='The file containing token addresses, one per line.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
def export_tokens(token_addresses, output, max_workers, provider_uri):
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def export_tokens(token_addresses, output, max_workers, provider_uri, chain='ethereum'):
|
||||
"""Exports ERC20/ERC721 tokens."""
|
||||
provider_uri = check_classic_provider_uri(chain, provider_uri)
|
||||
with smart_open(token_addresses, 'r') as token_addresses_file:
|
||||
job = ExportTokensJob(
|
||||
token_addresses_iterable=(token_address.strip() for token_address in token_addresses_file),
|
||||
|
||||
@@ -26,7 +26,7 @@ import click
|
||||
from web3 import Web3
|
||||
|
||||
from ethereumetl.jobs.export_traces_job import ExportTracesJob
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.jobs.exporters.traces_item_exporter import traces_item_exporter
|
||||
@@ -35,22 +35,32 @@ logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-s', '--start-block', default=0, type=int, help='Start block')
|
||||
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
|
||||
@click.option('-e', '--end-block', required=True, type=int, help='End block')
|
||||
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
|
||||
@click.option('-b', '--batch-size', default=5, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', required=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/.local/share/io.parity.ethereum/jsonrpc.ipc or http://localhost:8545/')
|
||||
def export_traces(start_block, end_block, batch_size, output, max_workers, provider_uri):
|
||||
@click.option('--genesis-traces/--no-genesis-traces', default=False, show_default=True, help='Whether to include genesis traces')
|
||||
@click.option('--daofork-traces/--no-daofork-traces', default=False, show_default=True, help='Whether to include daofork traces')
|
||||
@click.option('-t', '--timeout', default=60, show_default=True, type=int, help='IPC or HTTP request timeout.')
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def export_traces(start_block, end_block, batch_size, output, max_workers, provider_uri,
|
||||
genesis_traces, daofork_traces, timeout=60, chain='ethereum'):
|
||||
"""Exports traces from parity node."""
|
||||
if chain == 'classic' and daofork_traces == True:
|
||||
raise ValueError(
|
||||
'Classic chain does not include daofork traces. Disable daofork traces with --no-daofork-traces option.')
|
||||
job = ExportTracesJob(
|
||||
start_block=start_block,
|
||||
end_block=end_block,
|
||||
batch_size=batch_size,
|
||||
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
|
||||
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri, timeout=timeout))),
|
||||
item_exporter=traces_item_exporter(output),
|
||||
max_workers=max_workers)
|
||||
max_workers=max_workers,
|
||||
include_genesis_traces=genesis_traces,
|
||||
include_daofork_traces=daofork_traces)
|
||||
|
||||
job.run()
|
||||
|
||||
58
ethereumetl/cli/extract_contracts.py
Normal file
58
ethereumetl/cli/extract_contracts.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import csv
|
||||
import json
|
||||
|
||||
import click
|
||||
from blockchainetl.csv_utils import set_max_field_size_limit
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.exporters.contracts_item_exporter import contracts_item_exporter
|
||||
from ethereumetl.jobs.extract_contracts_job import ExtractContractsJob
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-t', '--traces', type=str, required=True, help='The CSV file containing traces.')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
def extract_contracts(traces, batch_size, output, max_workers):
|
||||
"""Extracts contracts from traces file."""
|
||||
|
||||
set_max_field_size_limit()
|
||||
|
||||
with smart_open(traces, 'r') as traces_file:
|
||||
if traces.endswith('.json'):
|
||||
traces_iterable = (json.loads(line) for line in traces_file)
|
||||
else:
|
||||
traces_iterable = csv.DictReader(traces_file)
|
||||
job = ExtractContractsJob(
|
||||
traces_iterable=traces_iterable,
|
||||
batch_size=batch_size,
|
||||
max_workers=max_workers,
|
||||
item_exporter=contracts_item_exporter(output))
|
||||
|
||||
job.run()
|
||||
@@ -25,15 +25,15 @@ import click
|
||||
import csv
|
||||
|
||||
from ethereumetl.csv_utils import set_max_field_size_limit
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from blockchainetl.file_utils import smart_open
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-i', '--input', default='-', type=str, help='The input file. If not specified stdin is used.')
|
||||
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-i', '--input', default='-', show_default=True, type=str, help='The input file. If not specified stdin is used.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-c', '--column', required=True, type=str, help='The csv column name to extract.')
|
||||
def extract_csv_column(input, output, column):
|
||||
"""Extracts column from given CSV file."""
|
||||
"""Extracts column from given CSV file. Deprecated - use extract_field."""
|
||||
set_max_field_size_limit()
|
||||
|
||||
with smart_open(input, 'r') as input_file, smart_open(output, 'w') as output_file:
|
||||
|
||||
@@ -21,21 +21,15 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import json
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from ethereumetl import misc_utils
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-i', '--input', default='-', type=str, help='The input file. If not specified stdin is used.')
|
||||
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-i', '--input', default='-', show_default=True, type=str, help='The input file. If not specified stdin is used.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-f', '--field', required=True, type=str, help='The field name to extract.')
|
||||
def extract_field(input, output, field):
|
||||
"""Extracts field from given JSON lines file."""
|
||||
# TODO: Add support for CSV
|
||||
with smart_open(input, 'r') as input_file, smart_open(output, 'w') as output_file:
|
||||
for line in input_file:
|
||||
item = json.loads(line)
|
||||
output_file.write(item[field] + '\n')
|
||||
"""Extracts field from given CSV or JSON newline-delimited file."""
|
||||
misc_utils.extract_field(input, output, field)
|
||||
|
||||
@@ -24,19 +24,19 @@ import json
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.exporters.traces_item_exporter import traces_item_exporter
|
||||
from ethereumetl.jobs.extract_geth_traces_job import ExtractGethTracesJob
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-i', '--input', required=True, type=str, help='The JSON file containing geth traces.')
|
||||
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
def extract_geth_traces(input, batch_size, output, max_workers):
|
||||
"""Extracts geth traces from JSON lines file."""
|
||||
with smart_open(input, 'r') as geth_traces_file:
|
||||
|
||||
@@ -25,19 +25,19 @@ import click
|
||||
import csv
|
||||
import json
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.exporters.token_transfers_item_exporter import token_transfers_item_exporter
|
||||
from ethereumetl.jobs.extract_token_transfers_job import ExtractTokenTransfersJob
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-l', '--logs', type=str, required=True, help='The CSV file containing receipt logs.')
|
||||
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
def extract_token_transfers(logs, batch_size, output, max_workers):
|
||||
"""Extracts ERC20/ERC721 transfers from logs file."""
|
||||
with smart_open(logs, 'r') as logs_file:
|
||||
|
||||
63
ethereumetl/cli/extract_tokens.py
Normal file
63
ethereumetl/cli/extract_tokens.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import csv
|
||||
import json
|
||||
|
||||
import click
|
||||
from blockchainetl.csv_utils import set_max_field_size_limit
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
|
||||
from ethereumetl.jobs.extract_tokens_job import ExtractTokensJob
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from web3 import Web3
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-c', '--contracts', type=str, required=True, help='The JSON file containing contracts.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
def extract_tokens(contracts, provider_uri, output, max_workers):
|
||||
"""Extracts tokens from contracts file."""
|
||||
|
||||
set_max_field_size_limit()
|
||||
|
||||
with smart_open(contracts, 'r') as contracts_file:
|
||||
if contracts.endswith('.json'):
|
||||
contracts_iterable = (json.loads(line) for line in contracts_file)
|
||||
else:
|
||||
contracts_iterable = csv.DictReader(contracts_file)
|
||||
job = ExtractTokensJob(
|
||||
contracts_iterable=contracts_iterable,
|
||||
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
|
||||
max_workers=max_workers,
|
||||
item_exporter=tokens_item_exporter(output))
|
||||
|
||||
job.run()
|
||||
@@ -20,24 +20,18 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import json
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from ethereumetl import misc_utils
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-i', '--input', default='-', type=str, help='The input file. If not specified stdin is used.')
|
||||
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-i', '--input', default='-', show_default=True, type=str, help='The input file. If not specified stdin is used.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-p', '--predicate', required=True, type=str,
|
||||
help='Predicate in Python code e.g. "item[\'is_erc20\']".')
|
||||
def filter_items(input, output, predicate):
|
||||
"""Filters given JSON lines file by predicate."""
|
||||
# TODO: Add support for CSV
|
||||
with smart_open(input, 'r') as input_file, smart_open(output, 'w') as output_file:
|
||||
for line in input_file:
|
||||
item = json.loads(line)
|
||||
if eval(predicate, globals(), {'item': item}):
|
||||
output_file.write(json.dumps(item) + '\n')
|
||||
"""Filters rows in given CSV or JSON newline-delimited file."""
|
||||
def evaluated_predicate(item):
|
||||
return eval(predicate, globals(), {'item': item})
|
||||
misc_utils.filter_items(input, output, evaluated_predicate)
|
||||
|
||||
@@ -21,32 +21,36 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import click
|
||||
|
||||
from datetime import datetime
|
||||
from web3 import Web3
|
||||
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.service.eth_service import EthService
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-d', '--date', required=True, type=lambda d: datetime.strptime(d, '%Y-%m-%d'),
|
||||
help='The date e.g. 2018-01-01.')
|
||||
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
def get_block_range_for_date(provider_uri, date, output):
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def get_block_range_for_date(provider_uri, date, output, chain='ethereum'):
|
||||
"""Outputs start and end blocks for given date."""
|
||||
provider_uri = check_classic_provider_uri(chain, provider_uri)
|
||||
provider = get_provider_from_uri(provider_uri)
|
||||
web3 = Web3(provider)
|
||||
eth_service = EthService(web3)
|
||||
|
||||
start_block, end_block = eth_service.get_block_range_for_date(date)
|
||||
|
||||
with click.open_file(output, 'w') as output_file:
|
||||
with smart_open(output, 'w') as output_file:
|
||||
output_file.write('{},{}\n'.format(start_block, end_block))
|
||||
|
||||
@@ -25,23 +25,26 @@ import click
|
||||
|
||||
from web3 import Web3
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.service.eth_service import EthService
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-s', '--start-timestamp', required=True, type=int, help='Start unix timestamp, in seconds.')
|
||||
@click.option('-e', '--end-timestamp', required=True, type=int, help='End unix timestamp, in seconds.')
|
||||
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
def get_block_range_for_timestamps(provider_uri, start_timestamp, end_timestamp, output):
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def get_block_range_for_timestamps(provider_uri, start_timestamp, end_timestamp, output, chain='ethereum'):
|
||||
"""Outputs start and end blocks for given timestamps."""
|
||||
provider_uri = check_classic_provider_uri(chain, provider_uri)
|
||||
provider = get_provider_from_uri(provider_uri)
|
||||
web3 = Web3(provider)
|
||||
eth_service = EthService(web3)
|
||||
|
||||
@@ -25,14 +25,14 @@ import click
|
||||
|
||||
from eth_utils import keccak
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-i', '--input-string', default='Transfer(address,address,uint256)', type=str,
|
||||
@click.option('-i', '--input-string', default='Transfer(address,address,uint256)', show_default=True, type=str,
|
||||
help='String to hash, e.g. Transfer(address,address,uint256)')
|
||||
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
def get_keccak_hash(input_string, output):
|
||||
"""Outputs 32-byte Keccak hash of given string."""
|
||||
hash = keccak(text=input_string)
|
||||
|
||||
110
ethereumetl/cli/stream.py
Normal file
110
ethereumetl/cli/stream.py
Normal file
@@ -0,0 +1,110 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
import logging
|
||||
import random
|
||||
|
||||
import click
|
||||
from blockchainetl.streaming.streaming_utils import configure_signals, configure_logging
|
||||
from ethereumetl.enumeration.entity_type import EntityType
|
||||
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-l', '--last-synced-block-file', default='last_synced_block.txt', show_default=True, type=str, help='')
|
||||
@click.option('--lag', default=0, show_default=True, type=int, help='The number of blocks to lag behind the network.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-o', '--output', type=str,
|
||||
help='Either Google PubSub topic path e.g. projects/your-project/topics/crypto_ethereum; '
|
||||
'or Postgres connection url e.g. postgresql+pg8000://postgres:admin@127.0.0.1:5432/ethereum. '
|
||||
'If not specified will print to console')
|
||||
@click.option('-s', '--start-block', default=None, show_default=True, type=int, help='Start block')
|
||||
@click.option('-e', '--entity-types', default=','.join(EntityType.ALL_FOR_INFURA), show_default=True, type=str,
|
||||
help='The list of entity types to export.')
|
||||
@click.option('--period-seconds', default=10, show_default=True, type=int, help='How many seconds to sleep between syncs')
|
||||
@click.option('-b', '--batch-size', default=10, show_default=True, type=int, help='How many blocks to batch in single request')
|
||||
@click.option('-B', '--block-batch-size', default=1, show_default=True, type=int, help='How many blocks to batch in single sync round')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The number of workers')
|
||||
@click.option('--log-file', default=None, show_default=True, type=str, help='Log file')
|
||||
@click.option('--pid-file', default=None, show_default=True, type=str, help='pid file')
|
||||
def stream(last_synced_block_file, lag, provider_uri, output, start_block, entity_types,
|
||||
period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None):
|
||||
"""Streams all data types to console or Google Pub/Sub."""
|
||||
configure_logging(log_file)
|
||||
configure_signals()
|
||||
entity_types = parse_entity_types(entity_types)
|
||||
validate_entity_types(entity_types, output)
|
||||
|
||||
from ethereumetl.streaming.item_exporter_creator import create_item_exporter
|
||||
from ethereumetl.streaming.eth_streamer_adapter import EthStreamerAdapter
|
||||
from blockchainetl.streaming.streamer import Streamer
|
||||
|
||||
# TODO: Implement fallback mechanism for provider uris instead of picking randomly
|
||||
provider_uri = pick_random_provider_uri(provider_uri)
|
||||
logging.info('Using ' + provider_uri)
|
||||
|
||||
streamer_adapter = EthStreamerAdapter(
|
||||
batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
|
||||
item_exporter=create_item_exporter(output),
|
||||
batch_size=batch_size,
|
||||
max_workers=max_workers,
|
||||
entity_types=entity_types
|
||||
)
|
||||
streamer = Streamer(
|
||||
blockchain_streamer_adapter=streamer_adapter,
|
||||
last_synced_block_file=last_synced_block_file,
|
||||
lag=lag,
|
||||
start_block=start_block,
|
||||
period_seconds=period_seconds,
|
||||
block_batch_size=block_batch_size,
|
||||
pid_file=pid_file
|
||||
)
|
||||
streamer.stream()
|
||||
|
||||
|
||||
def parse_entity_types(entity_types):
|
||||
entity_types = [c.strip() for c in entity_types.split(',')]
|
||||
|
||||
# validate passed types
|
||||
for entity_type in entity_types:
|
||||
if entity_type not in EntityType.ALL_FOR_STREAMING:
|
||||
raise click.BadOptionUsage(
|
||||
'--entity-type', '{} is not an available entity type. Supply a comma separated list of types from {}'
|
||||
.format(entity_type, ','.join(EntityType.ALL_FOR_STREAMING)))
|
||||
|
||||
return entity_types
|
||||
|
||||
|
||||
def validate_entity_types(entity_types, output):
|
||||
from ethereumetl.streaming.item_exporter_creator import determine_item_exporter_type, ItemExporterType
|
||||
item_exporter_type = determine_item_exporter_type(output)
|
||||
if item_exporter_type == ItemExporterType.POSTGRES \
|
||||
and (EntityType.CONTRACT in entity_types or EntityType.TOKEN in entity_types):
|
||||
raise ValueError('contract and token are not yet supported entity types for postgres item exporter.')
|
||||
|
||||
|
||||
def pick_random_provider_uri(provider_uri):
|
||||
provider_uris = [uri.strip() for uri in provider_uri.split(',')]
|
||||
return random.choice(provider_uris)
|
||||
@@ -28,3 +28,4 @@ class EthContract(object):
|
||||
self.function_sighashes = []
|
||||
self.is_erc20 = False
|
||||
self.is_erc721 = False
|
||||
self.block_number = None
|
||||
|
||||
@@ -28,3 +28,4 @@ class EthToken(object):
|
||||
self.name = None
|
||||
self.decimals = None
|
||||
self.total_supply = None
|
||||
self.block_number = None
|
||||
|
||||
@@ -36,6 +36,8 @@ class EthTrace(object):
|
||||
self.reward_type = None
|
||||
self.gas = None
|
||||
self.gas_used = None
|
||||
self.subtraces = None
|
||||
self.subtraces = 0
|
||||
self.trace_address = None
|
||||
self.error = None
|
||||
self.status = None
|
||||
self.trace_id = None
|
||||
|
||||
0
ethereumetl/enumeration/__init__.py
Normal file
0
ethereumetl/enumeration/__init__.py
Normal file
12
ethereumetl/enumeration/entity_type.py
Normal file
12
ethereumetl/enumeration/entity_type.py
Normal file
@@ -0,0 +1,12 @@
|
||||
class EntityType:
|
||||
BLOCK = 'block'
|
||||
TRANSACTION = 'transaction'
|
||||
RECEIPT = 'receipt'
|
||||
LOG = 'log'
|
||||
TOKEN_TRANSFER = 'token_transfer'
|
||||
TRACE = 'trace'
|
||||
CONTRACT = 'contract'
|
||||
TOKEN = 'token'
|
||||
|
||||
ALL_FOR_STREAMING = [BLOCK, TRANSACTION, LOG, TOKEN_TRANSFER, TRACE, CONTRACT, TOKEN]
|
||||
ALL_FOR_INFURA = [BLOCK, TRANSACTION, LOG, TOKEN_TRANSFER]
|
||||
@@ -20,47 +20,93 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
from requests.exceptions import Timeout as RequestsTimeout, HTTPError, TooManyRedirects
|
||||
from web3.utils.threads import Timeout as Web3Timeout
|
||||
|
||||
from ethereumetl.executors.bounded_executor import BoundedExecutor
|
||||
from ethereumetl.executors.fail_safe_executor import FailSafeExecutor
|
||||
from ethereumetl.misc.retriable_value_error import RetriableValueError
|
||||
from ethereumetl.progress_logger import ProgressLogger
|
||||
from ethereumetl.utils import dynamic_batch_iterator
|
||||
|
||||
RETRY_EXCEPTIONS = (ConnectionError, HTTPError, RequestsTimeout, TooManyRedirects, Web3Timeout, OSError)
|
||||
RETRY_EXCEPTIONS = (ConnectionError, HTTPError, RequestsTimeout, TooManyRedirects, Web3Timeout, OSError,
|
||||
RetriableValueError)
|
||||
|
||||
BATCH_CHANGE_COOLDOWN_PERIOD_SECONDS = 2 * 60
|
||||
|
||||
|
||||
# Executes the given work in batches, reducing the batch size exponentially in case of errors.
|
||||
class BatchWorkExecutor:
|
||||
def __init__(self, starting_batch_size, max_workers, retry_exceptions=RETRY_EXCEPTIONS):
|
||||
def __init__(self, starting_batch_size, max_workers, retry_exceptions=RETRY_EXCEPTIONS, max_retries=5):
|
||||
self.batch_size = starting_batch_size
|
||||
self.max_batch_size = starting_batch_size
|
||||
self.latest_batch_size_change_time = None
|
||||
self.max_workers = max_workers
|
||||
# Using bounded executor prevents unlimited queue growth
|
||||
# and allows monitoring in-progress futures and failing fast in case of errors.
|
||||
self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
|
||||
self.retry_exceptions = retry_exceptions
|
||||
self.max_retries = max_retries
|
||||
self.progress_logger = ProgressLogger()
|
||||
self.logger = logging.getLogger('BatchWorkExecutor')
|
||||
|
||||
def execute(self, work_iterable, work_handler, total_items=None):
|
||||
self.progress_logger.start(total_items=total_items)
|
||||
for batch in dynamic_batch_iterator(work_iterable, lambda: self.batch_size):
|
||||
self.executor.submit(self._fail_safe_execute, work_handler, batch)
|
||||
|
||||
# Check race conditions
|
||||
def _fail_safe_execute(self, work_handler, batch):
|
||||
try:
|
||||
work_handler(batch)
|
||||
self._try_increase_batch_size(len(batch))
|
||||
except self.retry_exceptions:
|
||||
batch_size = self.batch_size
|
||||
# Reduce the batch size. Subsequent batches will be 2 times smaller
|
||||
if batch_size == len(batch) and batch_size > 1:
|
||||
self.batch_size = int(batch_size / 2)
|
||||
# For the failed batch try handling items one by one
|
||||
self.logger.exception('An exception occurred while executing work_handler.')
|
||||
self._try_decrease_batch_size(len(batch))
|
||||
self.logger.info('The batch of size {} will be retried one item at a time.'.format(len(batch)))
|
||||
for item in batch:
|
||||
work_handler([item])
|
||||
execute_with_retries(work_handler, [item],
|
||||
max_retries=self.max_retries, retry_exceptions=self.retry_exceptions)
|
||||
|
||||
self.progress_logger.track(len(batch))
|
||||
|
||||
# Some acceptable race conditions are possible
|
||||
def _try_decrease_batch_size(self, current_batch_size):
|
||||
batch_size = self.batch_size
|
||||
if batch_size == current_batch_size and batch_size > 1:
|
||||
new_batch_size = int(current_batch_size / 2)
|
||||
self.logger.info('Reducing batch size to {}.'.format(new_batch_size))
|
||||
self.batch_size = new_batch_size
|
||||
self.latest_batch_size_change_time = time.time()
|
||||
|
||||
def _try_increase_batch_size(self, current_batch_size):
|
||||
if current_batch_size * 2 <= self.max_batch_size:
|
||||
current_time = time.time()
|
||||
latest_batch_size_change_time = self.latest_batch_size_change_time
|
||||
seconds_since_last_change = current_time - latest_batch_size_change_time \
|
||||
if latest_batch_size_change_time is not None else 0
|
||||
if seconds_since_last_change > BATCH_CHANGE_COOLDOWN_PERIOD_SECONDS:
|
||||
new_batch_size = current_batch_size * 2
|
||||
self.logger.info('Increasing batch size to {}.'.format(new_batch_size))
|
||||
self.batch_size = new_batch_size
|
||||
self.latest_batch_size_change_time = current_time
|
||||
|
||||
def shutdown(self):
|
||||
self.executor.shutdown()
|
||||
self.progress_logger.finish()
|
||||
|
||||
|
||||
def execute_with_retries(func, *args, max_retries=5, retry_exceptions=RETRY_EXCEPTIONS, sleep_seconds=1):
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
return func(*args)
|
||||
except retry_exceptions:
|
||||
logging.exception('An exception occurred while executing execute_with_retries. Retry #{}'.format(i))
|
||||
if i < max_retries - 1:
|
||||
logging.info('The request will be retried after {} seconds. Retry #{}'.format(sleep_seconds, i))
|
||||
time.sleep(sleep_seconds)
|
||||
continue
|
||||
else:
|
||||
raise
|
||||
|
||||
@@ -25,13 +25,10 @@ import csv
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from time import time
|
||||
|
||||
from web3 import Web3
|
||||
|
||||
from ethereumetl.csv_utils import set_max_field_size_limit
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.export_blocks_job import ExportBlocksJob
|
||||
from ethereumetl.jobs.export_contracts_job import ExportContractsJob
|
||||
from ethereumetl.jobs.export_receipts_job import ExportReceiptsJob
|
||||
@@ -42,11 +39,10 @@ from ethereumetl.jobs.exporters.contracts_item_exporter import contracts_item_ex
|
||||
from ethereumetl.jobs.exporters.receipts_and_logs_item_exporter import receipts_and_logs_item_exporter
|
||||
from ethereumetl.jobs.exporters.token_transfers_item_exporter import token_transfers_item_exporter
|
||||
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from web3 import Web3
|
||||
|
||||
logging_basic_config()
|
||||
logger = logging.getLogger('export_all')
|
||||
|
||||
|
||||
@@ -76,21 +72,45 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
|
||||
|
||||
padded_batch_start_block = str(batch_start_block).zfill(8)
|
||||
padded_batch_end_block = str(batch_end_block).zfill(8)
|
||||
block_range = f'{padded_batch_start_block}-{padded_batch_end_block}'
|
||||
file_name_suffix = f'{padded_batch_start_block}_{padded_batch_end_block}'
|
||||
block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format(
|
||||
padded_batch_start_block=padded_batch_start_block,
|
||||
padded_batch_end_block=padded_batch_end_block,
|
||||
)
|
||||
file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format(
|
||||
padded_batch_start_block=padded_batch_start_block,
|
||||
padded_batch_end_block=padded_batch_end_block,
|
||||
)
|
||||
|
||||
# # # blocks_and_transactions # # #
|
||||
|
||||
blocks_output_dir = f'{output_dir}/blocks{partition_dir}'
|
||||
blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)
|
||||
|
||||
transactions_output_dir = f'{output_dir}/transactions{partition_dir}'
|
||||
transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)
|
||||
|
||||
blocks_file = f'{blocks_output_dir}/blocks_{file_name_suffix}.csv'
|
||||
transactions_file = f'{transactions_output_dir}/transactions_{file_name_suffix}.csv'
|
||||
logger.info(f'Exporting blocks {block_range} to {blocks_file}')
|
||||
logger.info(f'Exporting transactions from blocks {block_range} to {transactions_file}')
|
||||
blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.csv'.format(
|
||||
blocks_output_dir=blocks_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.csv'.format(
|
||||
transactions_output_dir=transactions_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Exporting blocks {block_range} to {blocks_file}'.format(
|
||||
block_range=block_range,
|
||||
blocks_file=blocks_file,
|
||||
))
|
||||
logger.info('Exporting transactions from blocks {block_range} to {transactions_file}'.format(
|
||||
block_range=block_range,
|
||||
transactions_file=transactions_file,
|
||||
))
|
||||
|
||||
job = ExportBlocksJob(
|
||||
start_block=batch_start_block,
|
||||
@@ -107,11 +127,20 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
|
||||
|
||||
token_transfers_file = None
|
||||
if is_log_filter_supported(provider_uri):
|
||||
token_transfers_output_dir = f'{output_dir}/token_transfers{partition_dir}'
|
||||
token_transfers_output_dir = '{output_dir}/token_transfers{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(token_transfers_output_dir), exist_ok=True)
|
||||
|
||||
token_transfers_file = f'{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'
|
||||
logger.info(f'Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}')
|
||||
token_transfers_file = '{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'.format(
|
||||
token_transfers_output_dir=token_transfers_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}'.format(
|
||||
block_range=block_range,
|
||||
token_transfers_file=token_transfers_file,
|
||||
))
|
||||
|
||||
job = ExportTokenTransfersJob(
|
||||
start_block=batch_start_block,
|
||||
@@ -124,22 +153,46 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
|
||||
|
||||
# # # receipts_and_logs # # #
|
||||
|
||||
cache_output_dir = f'{output_dir}/.tmp{partition_dir}'
|
||||
cache_output_dir = '{output_dir}/.tmp{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(cache_output_dir), exist_ok=True)
|
||||
|
||||
transaction_hashes_file = f'{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'
|
||||
logger.info(f'Extracting hash column from transaction file {transactions_file}')
|
||||
transaction_hashes_file = '{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'.format(
|
||||
cache_output_dir=cache_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Extracting hash column from transaction file {transactions_file}'.format(
|
||||
transactions_file=transactions_file,
|
||||
))
|
||||
extract_csv_column_unique(transactions_file, transaction_hashes_file, 'hash')
|
||||
|
||||
receipts_output_dir = f'{output_dir}/receipts{partition_dir}'
|
||||
receipts_output_dir = '{output_dir}/receipts{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(receipts_output_dir), exist_ok=True)
|
||||
|
||||
logs_output_dir = f'{output_dir}/logs{partition_dir}'
|
||||
logs_output_dir = '{output_dir}/logs{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(logs_output_dir), exist_ok=True)
|
||||
|
||||
receipts_file = f'{receipts_output_dir}/receipts_{file_name_suffix}.csv'
|
||||
logs_file = f'{logs_output_dir}/logs_{file_name_suffix}.csv'
|
||||
logger.info(f'Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}')
|
||||
receipts_file = '{receipts_output_dir}/receipts_{file_name_suffix}.csv'.format(
|
||||
receipts_output_dir=receipts_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logs_file = '{logs_output_dir}/logs_{file_name_suffix}.csv'.format(
|
||||
logs_output_dir=logs_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}'.format(
|
||||
block_range=block_range,
|
||||
receipts_file=receipts_file,
|
||||
logs_file=logs_file,
|
||||
))
|
||||
|
||||
with smart_open(transaction_hashes_file, 'r') as transaction_hashes:
|
||||
job = ExportReceiptsJob(
|
||||
@@ -154,15 +207,29 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
|
||||
|
||||
# # # contracts # # #
|
||||
|
||||
contract_addresses_file = f'{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'
|
||||
logger.info(f'Extracting contract_address from receipt file {receipts_file}')
|
||||
contract_addresses_file = '{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'.format(
|
||||
cache_output_dir=cache_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Extracting contract_address from receipt file {receipts_file}'.format(
|
||||
receipts_file=receipts_file
|
||||
))
|
||||
extract_csv_column_unique(receipts_file, contract_addresses_file, 'contract_address')
|
||||
|
||||
contracts_output_dir = f'{output_dir}/contracts{partition_dir}'
|
||||
contracts_output_dir = '{output_dir}/contracts{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(contracts_output_dir), exist_ok=True)
|
||||
|
||||
contracts_file = f'{contracts_output_dir}/contracts_{file_name_suffix}.csv'
|
||||
logger.info(f'Exporting contracts from blocks {block_range} to {contracts_file}')
|
||||
contracts_file = '{contracts_output_dir}/contracts_{file_name_suffix}.csv'.format(
|
||||
contracts_output_dir=contracts_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Exporting contracts from blocks {block_range} to {contracts_file}'.format(
|
||||
block_range=block_range,
|
||||
contracts_file=contracts_file,
|
||||
))
|
||||
|
||||
with smart_open(contract_addresses_file, 'r') as contract_addresses_file:
|
||||
contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file
|
||||
@@ -178,15 +245,29 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
|
||||
# # # tokens # # #
|
||||
|
||||
if token_transfers_file is not None:
|
||||
token_addresses_file = f'{cache_output_dir}/token_addresses_{file_name_suffix}'
|
||||
logger.info(f'Extracting token_address from token_transfers file {token_transfers_file}')
|
||||
token_addresses_file = '{cache_output_dir}/token_addresses_{file_name_suffix}'.format(
|
||||
cache_output_dir=cache_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Extracting token_address from token_transfers file {token_transfers_file}'.format(
|
||||
token_transfers_file=token_transfers_file,
|
||||
))
|
||||
extract_csv_column_unique(token_transfers_file, token_addresses_file, 'token_address')
|
||||
|
||||
tokens_output_dir = f'{output_dir}/tokens{partition_dir}'
|
||||
tokens_output_dir = '{output_dir}/tokens{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(tokens_output_dir), exist_ok=True)
|
||||
|
||||
tokens_file = f'{tokens_output_dir}/tokens_{file_name_suffix}.csv'
|
||||
logger.info(f'Exporting tokens from blocks {block_range} to {tokens_file}')
|
||||
tokens_file = '{tokens_output_dir}/tokens_{file_name_suffix}.csv'.format(
|
||||
tokens_output_dir=tokens_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Exporting tokens from blocks {block_range} to {tokens_file}'.format(
|
||||
block_range=block_range,
|
||||
tokens_file=tokens_file,
|
||||
))
|
||||
|
||||
with smart_open(token_addresses_file, 'r') as token_addresses:
|
||||
job = ExportTokensJob(
|
||||
@@ -200,4 +281,7 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
|
||||
shutil.rmtree(os.path.dirname(cache_output_dir))
|
||||
end_time = time()
|
||||
time_diff = round(end_time - start_time, 5)
|
||||
logger.info(f'Exporting blocks {block_range} took {time_diff} seconds')
|
||||
logger.info('Exporting blocks {block_range} took {time_diff} seconds'.format(
|
||||
block_range=block_range,
|
||||
time_diff=time_diff,
|
||||
))
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
import json
|
||||
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.jobs.base_job import BaseJob
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.json_rpc_requests import generate_get_block_by_number_json_rpc
|
||||
from ethereumetl.mappers.block_mapper import EthBlockMapper
|
||||
from ethereumetl.mappers.transaction_mapper import EthTransactionMapper
|
||||
@@ -72,7 +72,7 @@ class ExportBlocksJob(BaseJob):
|
||||
|
||||
def _export_batch(self, block_number_batch):
|
||||
blocks_rpc = list(generate_get_block_by_number_json_rpc(block_number_batch, self.export_transactions))
|
||||
response = self.batch_web3_provider.make_request(json.dumps(blocks_rpc))
|
||||
response = self.batch_web3_provider.make_batch_request(json.dumps(blocks_rpc))
|
||||
results = rpc_response_batch_to_results(response)
|
||||
blocks = [self.block_mapper.json_dict_to_block(result) for result in results]
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
import json
|
||||
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.jobs.base_job import BaseJob
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.json_rpc_requests import generate_get_code_json_rpc
|
||||
from ethereumetl.mappers.contract_mapper import EthContractMapper
|
||||
|
||||
@@ -58,7 +58,7 @@ class ExportContractsJob(BaseJob):
|
||||
|
||||
def _export_contracts(self, contract_addresses):
|
||||
contracts_code_rpc = list(generate_get_code_json_rpc(contract_addresses))
|
||||
response_batch = self.batch_web3_provider.make_request(json.dumps(contracts_code_rpc))
|
||||
response_batch = self.batch_web3_provider.make_batch_request(json.dumps(contracts_code_rpc))
|
||||
|
||||
contracts = []
|
||||
for response in response_batch:
|
||||
|
||||
@@ -24,7 +24,7 @@ import json
|
||||
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.json_rpc_requests import generate_trace_block_by_number_json_rpc
|
||||
from ethereumetl.jobs.base_job import BaseJob
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.mappers.geth_trace_mapper import EthGethTraceMapper
|
||||
from ethereumetl.utils import validate_range, rpc_response_to_result
|
||||
|
||||
@@ -62,7 +62,7 @@ class ExportGethTracesJob(BaseJob):
|
||||
|
||||
def _export_batch(self, block_number_batch):
|
||||
trace_block_rpc = list(generate_trace_block_by_number_json_rpc(block_number_batch))
|
||||
response = self.batch_web3_provider.make_request(json.dumps(trace_block_rpc))
|
||||
response = self.batch_web3_provider.make_batch_request(json.dumps(trace_block_rpc))
|
||||
|
||||
for response_item in response:
|
||||
block_number = response_item.get('id')
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
|
||||
import json
|
||||
|
||||
from ethereumetl.jobs.base_job import BaseJob
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.json_rpc_requests import generate_get_receipt_json_rpc
|
||||
from ethereumetl.mappers.receipt_log_mapper import EthReceiptLogMapper
|
||||
@@ -64,7 +64,7 @@ class ExportReceiptsJob(BaseJob):
|
||||
|
||||
def _export_receipts(self, transaction_hashes):
|
||||
receipts_rpc = list(generate_get_receipt_json_rpc(transaction_hashes))
|
||||
response = self.batch_web3_provider.make_request(json.dumps(receipts_rpc))
|
||||
response = self.batch_web3_provider.make_batch_request(json.dumps(receipts_rpc))
|
||||
results = rpc_response_batch_to_results(response)
|
||||
receipts = [self.receipt_mapper.json_dict_to_receipt(result) for result in results]
|
||||
for receipt in receipts:
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
# SOFTWARE.
|
||||
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.jobs.base_job import BaseJob
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.mappers.token_transfer_mapper import EthTokenTransferMapper
|
||||
from ethereumetl.mappers.receipt_log_mapper import EthReceiptLogMapper
|
||||
from ethereumetl.service.token_transfer_extractor import EthTokenTransferExtractor, TRANSFER_EVENT_TOPIC
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
|
||||
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.jobs.base_job import BaseJob
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.mappers.token_mapper import EthTokenMapper
|
||||
from ethereumetl.service.eth_token_service import EthTokenService
|
||||
|
||||
@@ -46,8 +46,9 @@ class ExportTokensJob(BaseJob):
|
||||
for token_address in token_addresses:
|
||||
self._export_token(token_address)
|
||||
|
||||
def _export_token(self, token_address):
|
||||
def _export_token(self, token_address, block_number=None):
|
||||
token = self.token_service.get_token(token_address)
|
||||
token.block_number = block_number
|
||||
token_dict = self.token_mapper.token_to_dict(token)
|
||||
self.item_exporter.export_item(token_dict)
|
||||
|
||||
|
||||
@@ -21,9 +21,14 @@
|
||||
# SOFTWARE.
|
||||
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.utils import validate_range
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.mainnet_daofork_state_changes import DAOFORK_BLOCK_NUMBER
|
||||
from ethereumetl.mappers.trace_mapper import EthTraceMapper
|
||||
from ethereumetl.service.eth_special_trace_service import EthSpecialTraceService
|
||||
|
||||
from ethereumetl.service.trace_id_calculator import calculate_trace_ids
|
||||
from ethereumetl.service.trace_status_calculator import calculate_trace_statuses
|
||||
from ethereumetl.utils import validate_range
|
||||
|
||||
|
||||
class ExportTracesJob(BaseJob):
|
||||
@@ -34,7 +39,9 @@ class ExportTracesJob(BaseJob):
|
||||
batch_size,
|
||||
web3,
|
||||
item_exporter,
|
||||
max_workers):
|
||||
max_workers,
|
||||
include_genesis_traces=False,
|
||||
include_daofork_traces=False):
|
||||
validate_range(start_block, end_block)
|
||||
self.start_block = start_block
|
||||
self.end_block = end_block
|
||||
@@ -47,6 +54,10 @@ class ExportTracesJob(BaseJob):
|
||||
|
||||
self.trace_mapper = EthTraceMapper()
|
||||
|
||||
self.special_trace_service = EthSpecialTraceService()
|
||||
self.include_genesis_traces = include_genesis_traces
|
||||
self.include_daofork_traces = include_daofork_traces
|
||||
|
||||
def _start(self):
|
||||
self.item_exporter.open()
|
||||
|
||||
@@ -63,12 +74,30 @@ class ExportTracesJob(BaseJob):
|
||||
assert len(block_number_batch) == 1
|
||||
block_number = block_number_batch[0]
|
||||
|
||||
all_traces = []
|
||||
|
||||
if self.include_genesis_traces and 0 in block_number_batch:
|
||||
genesis_traces = self.special_trace_service.get_genesis_traces()
|
||||
all_traces.extend(genesis_traces)
|
||||
|
||||
if self.include_daofork_traces and DAOFORK_BLOCK_NUMBER in block_number_batch:
|
||||
daofork_traces = self.special_trace_service.get_daofork_traces()
|
||||
all_traces.extend(daofork_traces)
|
||||
|
||||
# TODO: Change to traceFilter when this issue is fixed
|
||||
# https://github.com/paritytech/parity-ethereum/issues/9822
|
||||
json_traces = self.web3.parity.traceBlock(block_number)
|
||||
|
||||
for json_trace in json_traces:
|
||||
trace = self.trace_mapper.json_dict_to_trace(json_trace)
|
||||
if json_traces is None:
|
||||
raise ValueError('Response from the node is None. Is the node fully synced?')
|
||||
|
||||
traces = [self.trace_mapper.json_dict_to_trace(json_trace) for json_trace in json_traces]
|
||||
all_traces.extend(traces)
|
||||
|
||||
calculate_trace_statuses(all_traces)
|
||||
calculate_trace_ids(all_traces)
|
||||
|
||||
for trace in all_traces:
|
||||
self.item_exporter.export_item(self.trace_mapper.trace_to_dict(trace))
|
||||
|
||||
def _end(self):
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
|
||||
BLOCK_FIELDS_TO_EXPORT = [
|
||||
'number',
|
||||
@@ -55,7 +55,8 @@ TRANSACTION_FIELDS_TO_EXPORT = [
|
||||
'value',
|
||||
'gas',
|
||||
'gas_price',
|
||||
'input'
|
||||
'input',
|
||||
'block_timestamp'
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -21,14 +21,15 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
|
||||
FIELDS_TO_EXPORT = [
|
||||
'address',
|
||||
'bytecode',
|
||||
'function_sighashes',
|
||||
'is_erc20',
|
||||
'is_erc721'
|
||||
'is_erc721',
|
||||
'block_number',
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
|
||||
FIELDS_TO_EXPORT = [
|
||||
'block_number',
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
|
||||
RECEIPT_FIELDS_TO_EXPORT = [
|
||||
'transaction_hash',
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
|
||||
FIELDS_TO_EXPORT = [
|
||||
'token_address',
|
||||
|
||||
@@ -21,14 +21,15 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
|
||||
FIELDS_TO_EXPORT = [
|
||||
'address',
|
||||
'symbol',
|
||||
'name',
|
||||
'decimals',
|
||||
'total_supply'
|
||||
'total_supply',
|
||||
'block_number'
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
|
||||
FIELDS_TO_EXPORT = [
|
||||
'block_number',
|
||||
@@ -40,6 +40,8 @@ FIELDS_TO_EXPORT = [
|
||||
'subtraces',
|
||||
'trace_address',
|
||||
'error',
|
||||
'status',
|
||||
'trace_id',
|
||||
]
|
||||
|
||||
|
||||
|
||||
85
ethereumetl/jobs/extract_contracts_job.py
Normal file
85
ethereumetl/jobs/extract_contracts_job.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.domain.contract import EthContract
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.mappers.contract_mapper import EthContractMapper
|
||||
|
||||
from ethereumetl.service.eth_contract_service import EthContractService
|
||||
from ethereumetl.utils import to_int_or_none
|
||||
|
||||
|
||||
# Extract contracts
|
||||
class ExtractContractsJob(BaseJob):
|
||||
def __init__(
|
||||
self,
|
||||
traces_iterable,
|
||||
batch_size,
|
||||
max_workers,
|
||||
item_exporter):
|
||||
self.traces_iterable = traces_iterable
|
||||
|
||||
self.batch_work_executor = BatchWorkExecutor(batch_size, max_workers)
|
||||
self.item_exporter = item_exporter
|
||||
|
||||
self.contract_service = EthContractService()
|
||||
self.contract_mapper = EthContractMapper()
|
||||
|
||||
def _start(self):
|
||||
self.item_exporter.open()
|
||||
|
||||
def _export(self):
|
||||
self.batch_work_executor.execute(self.traces_iterable, self._extract_contracts)
|
||||
|
||||
def _extract_contracts(self, traces):
|
||||
for trace in traces:
|
||||
trace['status'] = to_int_or_none(trace.get('status'))
|
||||
trace['block_number'] = to_int_or_none(trace.get('block_number'))
|
||||
|
||||
contract_creation_traces = [trace for trace in traces
|
||||
if trace.get('trace_type') == 'create' and trace.get('to_address') is not None
|
||||
and len(trace.get('to_address')) > 0 and trace.get('status') == 1]
|
||||
|
||||
contracts = []
|
||||
for trace in contract_creation_traces:
|
||||
contract = EthContract()
|
||||
contract.address = trace.get('to_address')
|
||||
bytecode = trace.get('output')
|
||||
contract.bytecode = bytecode
|
||||
contract.block_number = trace.get('block_number')
|
||||
|
||||
function_sighashes = self.contract_service.get_function_sighashes(bytecode)
|
||||
|
||||
contract.function_sighashes = function_sighashes
|
||||
contract.is_erc20 = self.contract_service.is_erc20_contract(function_sighashes)
|
||||
contract.is_erc721 = self.contract_service.is_erc721_contract(function_sighashes)
|
||||
|
||||
contracts.append(contract)
|
||||
|
||||
for contract in contracts:
|
||||
self.item_exporter.export_item(self.contract_mapper.contract_to_dict(contract))
|
||||
|
||||
def _end(self):
|
||||
self.batch_work_executor.shutdown()
|
||||
self.item_exporter.close()
|
||||
@@ -21,7 +21,7 @@
|
||||
# SOFTWARE.
|
||||
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.jobs.base_job import BaseJob
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.mappers.trace_mapper import EthTraceMapper
|
||||
from ethereumetl.mappers.geth_trace_mapper import EthGethTraceMapper
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
# SOFTWARE.
|
||||
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.jobs.base_job import BaseJob
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.mappers.token_transfer_mapper import EthTokenTransferMapper
|
||||
from ethereumetl.mappers.receipt_log_mapper import EthReceiptLogMapper
|
||||
from ethereumetl.service.token_transfer_extractor import EthTokenTransferExtractor
|
||||
|
||||
@@ -21,10 +21,22 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.cli.export_contracts import export_contracts
|
||||
from ethereumetl.jobs.export_tokens_job import ExportTokensJob
|
||||
|
||||
|
||||
class ExtractTokensJob(ExportTokensJob):
|
||||
def __init__(self, web3, item_exporter, contracts_iterable, max_workers):
|
||||
super().__init__(web3, item_exporter, [], max_workers)
|
||||
self.contracts_iterable = contracts_iterable
|
||||
|
||||
def _export(self):
|
||||
self.batch_work_executor.execute(self.contracts_iterable, self._export_tokens_from_contracts)
|
||||
|
||||
def _export_tokens_from_contracts(self, contracts):
|
||||
tokens = [contract for contract in contracts if contract.get('is_erc20') or contract.get('is_erc721')]
|
||||
|
||||
for token in tokens:
|
||||
self._export_token(token_address=token['address'], block_number=token['block_number'])
|
||||
|
||||
|
||||
print('========================================================================================')
|
||||
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
|
||||
print('========================================================================================')
|
||||
|
||||
export_contracts()
|
||||
@@ -1,5 +0,0 @@
|
||||
import logging
|
||||
|
||||
|
||||
def logging_basic_config():
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s [%(levelname)s] - %(message)s')
|
||||
149
ethereumetl/mainnet_daofork_state_changes.py
Normal file
149
ethereumetl/mainnet_daofork_state_changes.py
Normal file
@@ -0,0 +1,149 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# https://github.com/ethereum/EIPs/blob/master/EIPS/eip-779.md
|
||||
# https://blog.ethereum.org/2016/07/20/hard-fork-completed/
|
||||
# https://gist.github.com/gavofyork/af747a034fbee2920f862ed352d32347
|
||||
# https://blog.ethereum.org/2016/07/15/to-fork-or-not-to-fork/
|
||||
|
||||
DAOFORK_BLOCK_NUMBER = 1920000
|
||||
WITHDRAW_DAO_ADDRESS = '0xbf4ed7b27f1d666546e30d74d50d173d20bca754'
|
||||
|
||||
MAINNET_DAOFORK_STATE_CHANGES = [
|
||||
# from_address, to_address, value
|
||||
("0x005f5cee7a43331d5a3d3eec71305925a62f34b6", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x0101f3be8ebb4bbd39a2e3b9a3639d4259832fd9", WITHDRAW_DAO_ADDRESS, 559384955979606013894),
|
||||
("0x057b56736d32b86616a10f619859c6cd6f59092a", WITHDRAW_DAO_ADDRESS, 9900012824972102),
|
||||
("0x06706dd3f2c9abf0a21ddcc6941d9b86f0596936", WITHDRAW_DAO_ADDRESS, 1428573279216753537),
|
||||
("0x0737a6b837f97f46ebade41b9bc3e1c509c85c53", WITHDRAW_DAO_ADDRESS, 7144077587762826223),
|
||||
("0x07f5c1e1bc2c93e0402f23341973a0e043f7bf8a", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x0e0da70933f4c7849fc0d203f5d1d43b9ae4532d", WITHDRAW_DAO_ADDRESS, 19173240336954131945545),
|
||||
("0x0ff30d6de14a8224aa97b78aea5388d1c51c1f00", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x12e626b0eebfe86a56d633b9864e389b45dcb260", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x1591fc0f688c81fbeb17f5426a162a7024d430c2", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x17802f43a0137c506ba92291391a8a8f207f487d", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x1975bd06d486162d5dc297798dfc41edd5d160a7", WITHDRAW_DAO_ADDRESS, 989001281201758473335),
|
||||
("0x1ca6abd14d30affe533b24d7a21bff4c2d5e1f3b", WITHDRAW_DAO_ADDRESS, 76761842290232377901),
|
||||
("0x1cba23d343a983e9b5cfd19496b9a9701ada385f", WITHDRAW_DAO_ADDRESS, 68587370259945226),
|
||||
("0x200450f06520bdd6c527622a273333384d870efb", WITHDRAW_DAO_ADDRESS, 1250001619314659344457),
|
||||
("0x21c7fdb9ed8d291d79ffd82eb2c4356ec0d81241", WITHDRAW_DAO_ADDRESS, 27428797178668633),
|
||||
("0x23b75c2f6791eef49c69684db4c6c1f93bf49a50", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x24c4d950dfd4dd1902bbed3508144a54542bba94", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x253488078a4edf4d6f42f113d1e62836a942cf1a", WITHDRAW_DAO_ADDRESS, 3486036451558542464),
|
||||
("0x27b137a85656544b1ccb5a0f2e561a5703c6a68f", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x2a5ed960395e2a49b1c758cef4aa15213cfd874c", WITHDRAW_DAO_ADDRESS, 18693039890011849),
|
||||
("0x2b3455ec7fedf16e646268bf88846bd7a2319bb2", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x2c19c7f9ae8b751e37aeb2d93a699722395ae18f", WITHDRAW_DAO_ADDRESS, 8519214441755701),
|
||||
("0x304a554a310c7e546dfe434669c62820b7d83490", WITHDRAW_DAO_ADDRESS, 3642408527612792706899331),
|
||||
("0x319f70bab6845585f412ec7724b744fec6095c85", WITHDRAW_DAO_ADDRESS, 90658),
|
||||
("0x35a051a0010aba705c9008d7a7eff6fb88f6ea7b", WITHDRAW_DAO_ADDRESS, 15276059789372406985),
|
||||
("0x3ba4d81db016dc2890c81f3acec2454bff5aada5", WITHDRAW_DAO_ADDRESS, 1),
|
||||
("0x3c02a7bc0391e86d91b7d144e61c2c01a25a79c5", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x40b803a9abce16f50f36a77ba41180eb90023925", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x440c59b325d2997a134c2c7c60a8c61611212bad", WITHDRAW_DAO_ADDRESS, 266854104538362875475),
|
||||
("0x4486a3d68fac6967006d7a517b889fd3f98c102b", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x4613f3bca5c44ea06337a9e439fbc6d42e501d0a", WITHDRAW_DAO_ADDRESS, 28927603152430302650042),
|
||||
("0x47e7aa56d6bdf3f36be34619660de61275420af8", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x4863226780fe7c0356454236d3b1c8792785748d", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x492ea3bb0f3315521c31f273e565b868fc090f17", WITHDRAW_DAO_ADDRESS, 367380383063135344585),
|
||||
("0x4cb31628079fb14e4bc3cd5e30c2f7489b00960c", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x4deb0033bb26bc534b197e61d19e0733e5679784", WITHDRAW_DAO_ADDRESS, 1256101627216914882057),
|
||||
("0x4fa802324e929786dbda3b8820dc7834e9134a2a", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x4fd6ace747f06ece9c49699c7cabc62d02211f75", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x51e0ddd9998364a2eb38588679f0d2c42653e4a6", WITHDRAW_DAO_ADDRESS, 10000012954517274755),
|
||||
("0x52c5317c848ba20c7504cb2c8052abd1fde29d03", WITHDRAW_DAO_ADDRESS, 1996002585721648041229),
|
||||
("0x542a9515200d14b68e934e9830d91645a980dd7a", WITHDRAW_DAO_ADDRESS, 12548793143344641481996),
|
||||
("0x5524c55fb03cf21f549444ccbecb664d0acad706", WITHDRAW_DAO_ADDRESS, 6773243673260677597543),
|
||||
("0x579a80d909f346fbfb1189493f521d7f48d52238", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x58b95c9a9d5d26825e70a82b6adb139d3fd829eb", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x5c6e67ccd5849c0d29219c4f95f1a7a93b3f5dc5", WITHDRAW_DAO_ADDRESS, 1),
|
||||
("0x5c8536898fbb74fc7445814902fd08422eac56d0", WITHDRAW_DAO_ADDRESS, 205100000000392887672),
|
||||
("0x5d2b2e6fcbe3b11d26b525e085ff818dae332479", WITHDRAW_DAO_ADDRESS, 5000006477258637377),
|
||||
("0x5dc28b15dffed94048d73806ce4b7a4612a1d48f", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x5f9f3392e9f62f63b8eac0beb55541fc8627f42c", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x6131c42fa982e56929107413a9d526fd99405560", WITHDRAW_DAO_ADDRESS, 2121837249362469256186),
|
||||
("0x6231b6d0d5e77fe001c2a460bd9584fee60d409b", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x627a0a960c079c21c34f7612d5d230e01b4ad4c7", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x63ed5a272de2f6d968408b4acb9024f4cc208ebf", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x6966ab0d485353095148a2155858910e0965b6f9", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x6b0c4d41ba9ab8d8cfb5d379c69a612f2ced8ecb", WITHDRAW_DAO_ADDRESS, 854763543),
|
||||
("0x6d87578288b6cb5549d5076a207456a1f6a63dc0", WITHDRAW_DAO_ADDRESS, 1944767821345229848),
|
||||
("0x6f6704e5a10332af6672e50b3d9754dc460dfa4d", WITHDRAW_DAO_ADDRESS, 41173345768012804300),
|
||||
("0x7602b46df5390e432ef1c307d4f2c9ff6d65cc97", WITHDRAW_DAO_ADDRESS, 369231179004682274248),
|
||||
("0x779543a0491a837ca36ce8c635d6154e3c4911a6", WITHDRAW_DAO_ADDRESS, 100000000000000000),
|
||||
("0x77ca7b50b6cd7e2f3fa008e24ab793fd56cb15f6", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x782495b7b3355efb2833d56ecb34dc22ad7dfcc4", WITHDRAW_DAO_ADDRESS, 250000323862931868891),
|
||||
("0x807640a13483f8ac783c557fcdf27be11ea4ac7a", WITHDRAW_DAO_ADDRESS, 89472700),
|
||||
("0x8163e7fb499e90f8544ea62bbf80d21cd26d9efd", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x84ef4b2357079cd7a7c69fd7a37cd0609a679106", WITHDRAW_DAO_ADDRESS, 598974326560793095813484),
|
||||
("0x86af3e9626fce1957c82e88cbf04ddf3a2ed7915", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x8d9edb3054ce5c5774a420ac37ebae0ac02343c6", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x914d1b8b43e92723e64fd0a06f5bdb8dd9b10c79", WITHDRAW_DAO_ADDRESS, 285714295714285714286),
|
||||
("0x97f43a37f595ab5dd318fb46e7a155eae057317a", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x9aa008f65de0b923a2a4f02012ad034a5e2e2192", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x9c15b54878ba618f494b38f0ae7443db6af648ba", WITHDRAW_DAO_ADDRESS, 2236999142516500888),
|
||||
("0x9c50426be05db97f5d64fc54bf89eff947f0a321", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0x9da397b9e80755301a3b32173283a91c0ef6c87e", WITHDRAW_DAO_ADDRESS, 934889382511061152962),
|
||||
("0x9ea779f907f0b315b364b0cfc39a0fde5b02a416", WITHDRAW_DAO_ADDRESS, 15841461690131427090010),
|
||||
("0x9f27daea7aca0aa0446220b98d028715e3bc803d", WITHDRAW_DAO_ADDRESS, 99998647723253121277),
|
||||
("0x9fcd2deaff372a39cc679d5c5e4de7bafb0b1339", WITHDRAW_DAO_ADDRESS, 1409336722195117395464),
|
||||
("0xa2f1ccba9395d7fcb155bba8bc92db9bafaeade7", WITHDRAW_DAO_ADDRESS, 5000006477258637377),
|
||||
("0xa3acf3a1e16b1d7c315e23510fdd7847b48234f6", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xa5dc5acd6a7968a4554d89d65e59b7fd3bff0f90", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xa82f360a8d3455c5c41366975bde739c37bfeb8a", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xac1ecab32727358dba8962a0f3b261731aad9723", WITHDRAW_DAO_ADDRESS, 1),
|
||||
("0xaccc230e8a6e5be9160b8cdf2864dd2a001c28b6", WITHDRAW_DAO_ADDRESS, 23997787866533545896),
|
||||
("0xacd87e28b0c9d1254e868b81cba4cc20d9a32225", WITHDRAW_DAO_ADDRESS, 207153967008322399135),
|
||||
("0xadf80daec7ba8dcf15392f1ac611fff65d94f880", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xaeeb8ff27288bdabc0fa5ebb731b6f409507516c", WITHDRAW_DAO_ADDRESS, 859189750496835322093),
|
||||
("0xb136707642a4ea12fb4bae820f03d2562ebff487", WITHDRAW_DAO_ADDRESS, 7277385711515429122911683),
|
||||
("0xb2c6f0dfbb716ac562e2d85d6cb2f8d5ee87603e", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xb3fb0e5aba0e20e5c49d252dfd30e102b171a425", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xb52042c8ca3f8aa246fa79c3feaa3d959347c0ab", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xb9637156d330c0d605a791f1c31ba5890582fe1c", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xbb9bc244d798123fde783fcc1c72d3bb8c189413", WITHDRAW_DAO_ADDRESS, 1200000000000000001),
|
||||
("0xbc07118b9ac290e4622f5e77a0853539789effbe", WITHDRAW_DAO_ADDRESS, 5634097608979247392143),
|
||||
("0xbcf899e6c7d9d5a215ab1e3444c86806fa854c76", WITHDRAW_DAO_ADDRESS, 30696803822257124360133),
|
||||
("0xbe8539bfe837b67d1282b2b1d61c3f723966f049", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xc4bbd073882dd2add2424cf47d35213405b01324", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xca544e5c4687d109611d0f8f928b53a25af72448", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xcbb9d3703e651b0d496cdefb8b92c25aeb2171f7", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xcc34673c6c40e791051898567a1222daf90be287", WITHDRAW_DAO_ADDRESS, 60000077727103648),
|
||||
("0xceaeb481747ca6c540a000c1f3641f8cef161fa7", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xd131637d5275fd1a68a3200f4ad25c71a2a9522e", WITHDRAW_DAO_ADDRESS, 118886510785155274580),
|
||||
("0xd164b088bd9108b60d0ca3751da4bceb207b0782", WITHDRAW_DAO_ADDRESS, 1000001295451727475566),
|
||||
("0xd1ac8b1ef1b69ff51d1d401a476e7e612414f091", WITHDRAW_DAO_ADDRESS, 18387737083543350),
|
||||
("0xd343b217de44030afaa275f54d31a9317c7f441e", WITHDRAW_DAO_ADDRESS, 5192307692307692307692),
|
||||
("0xd4fe7bc31cedb7bfb8a345f31e668033056b2728", WITHDRAW_DAO_ADDRESS, 110000142499690430),
|
||||
("0xd9aef3a1e38a39c16b31d1ace71bca8ef58d315b", WITHDRAW_DAO_ADDRESS, 100000129545172747556),
|
||||
("0xda2fef9e4a3230988ff17df2165440f37e8b1708", WITHDRAW_DAO_ADDRESS, 73722042576599901129491),
|
||||
("0xdbe9b615a3ae8709af8b93336ce9b477e4ac0940", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xe308bd1ac5fda103967359b2712dd89deffb7973", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xe4ae1efdfc53b73893af49113d8694a057b9c0d1", WITHDRAW_DAO_ADDRESS, 5000006477258637377),
|
||||
("0xec8e57756626fdc07c63ad2eafbd28d08e7b0ca5", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xecd135fa4f61a655311e86238c92adcd779555d2", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xf0b1aa0eb660754448a7937c022e30aa692fe0c5", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xf1385fb24aad0cd7432824085e42aff90886fef5", WITHDRAW_DAO_ADDRESS, 0),
|
||||
("0xf14c14075d6c4ed84b86798af0956deef67365b5", WITHDRAW_DAO_ADDRESS, 2123311222366559138),
|
||||
("0xf4c64518ea10f995918a454158c6b61407ea345c", WITHDRAW_DAO_ADDRESS, 269565591797974102411594),
|
||||
("0xfe24cdd8648121a43a7c86d289be4dd2951ed49f", WITHDRAW_DAO_ADDRESS, 269833661813680507459)
|
||||
]
|
||||
8921
ethereumetl/mainnet_genesis_alloc.py
Normal file
8921
ethereumetl/mainnet_genesis_alloc.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -55,7 +55,8 @@ class EthBlockMapper(object):
|
||||
|
||||
if 'transactions' in json_dict:
|
||||
block.transactions = [
|
||||
self.transaction_mapper.json_dict_to_transaction(tx) for tx in json_dict['transactions']
|
||||
self.transaction_mapper.json_dict_to_transaction(tx, block_timestamp=block.timestamp)
|
||||
for tx in json_dict['transactions']
|
||||
if isinstance(tx, dict)
|
||||
]
|
||||
|
||||
|
||||
@@ -40,5 +40,6 @@ class EthContractMapper(object):
|
||||
'bytecode': contract.bytecode,
|
||||
'function_sighashes': contract.function_sighashes,
|
||||
'is_erc20': contract.is_erc20,
|
||||
'is_erc721': contract.is_erc721
|
||||
'is_erc721': contract.is_erc721,
|
||||
'block_number': contract.block_number
|
||||
}
|
||||
|
||||
@@ -29,5 +29,6 @@ class EthTokenMapper(object):
|
||||
'symbol': token.symbol,
|
||||
'name': token.name,
|
||||
'decimals': token.decimals,
|
||||
'total_supply': token.total_supply
|
||||
'total_supply': token.total_supply,
|
||||
'block_number': token.block_number
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
|
||||
from ethereumetl.domain.trace import EthTrace
|
||||
from ethereumetl.mainnet_daofork_state_changes import DAOFORK_BLOCK_NUMBER
|
||||
from ethereumetl.utils import hex_to_dec, to_normalized_address
|
||||
|
||||
|
||||
@@ -93,6 +94,36 @@ class EthTraceMapper(object):
|
||||
|
||||
return traces
|
||||
|
||||
def genesis_alloc_to_trace(self, allocation):
|
||||
address = allocation[0]
|
||||
value = allocation[1]
|
||||
|
||||
trace = EthTrace()
|
||||
|
||||
trace.block_number = 0
|
||||
trace.to_address = address
|
||||
trace.value = value
|
||||
trace.trace_type = 'genesis'
|
||||
trace.status = 1
|
||||
|
||||
return trace
|
||||
|
||||
def daofork_state_change_to_trace(self, state_change):
|
||||
from_address = state_change[0]
|
||||
to_address = state_change[1]
|
||||
value = state_change[2]
|
||||
|
||||
trace = EthTrace()
|
||||
|
||||
trace.block_number = DAOFORK_BLOCK_NUMBER
|
||||
trace.from_address = from_address
|
||||
trace.to_address = to_address
|
||||
trace.value = value
|
||||
trace.trace_type = 'daofork'
|
||||
trace.status = 1
|
||||
|
||||
return trace
|
||||
|
||||
def _iterate_transaction_trace(self, block_number, tx_index, tx_trace, trace_address=[]):
|
||||
trace = EthTrace()
|
||||
|
||||
@@ -157,4 +188,6 @@ class EthTraceMapper(object):
|
||||
'subtraces': trace.subtraces,
|
||||
'trace_address': trace.trace_address,
|
||||
'error': trace.error,
|
||||
'status': trace.status,
|
||||
'trace_id': trace.trace_id,
|
||||
}
|
||||
|
||||
@@ -26,12 +26,13 @@ from ethereumetl.utils import hex_to_dec, to_normalized_address
|
||||
|
||||
|
||||
class EthTransactionMapper(object):
|
||||
def json_dict_to_transaction(self, json_dict):
|
||||
def json_dict_to_transaction(self, json_dict, **kwargs):
|
||||
transaction = EthTransaction()
|
||||
transaction.hash = json_dict.get('hash')
|
||||
transaction.nonce = hex_to_dec(json_dict.get('nonce'))
|
||||
transaction.block_hash = json_dict.get('blockHash')
|
||||
transaction.block_number = hex_to_dec(json_dict.get('blockNumber'))
|
||||
transaction.block_timestamp = kwargs.get('block_timestamp')
|
||||
transaction.transaction_index = hex_to_dec(json_dict.get('transactionIndex'))
|
||||
transaction.from_address = to_normalized_address(json_dict.get('from'))
|
||||
transaction.to_address = to_normalized_address(json_dict.get('to'))
|
||||
@@ -48,6 +49,7 @@ class EthTransactionMapper(object):
|
||||
'nonce': transaction.nonce,
|
||||
'block_hash': transaction.block_hash,
|
||||
'block_number': transaction.block_number,
|
||||
'block_timestamp': transaction.block_timestamp,
|
||||
'transaction_index': transaction.transaction_index,
|
||||
'from_address': transaction.from_address,
|
||||
'to_address': transaction.to_address,
|
||||
|
||||
21
ethereumetl/misc/__init__.py
Normal file
21
ethereumetl/misc/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
2
ethereumetl/misc/retriable_value_error.py
Normal file
2
ethereumetl/misc/retriable_value_error.py
Normal file
@@ -0,0 +1,2 @@
|
||||
class RetriableValueError(ValueError):
|
||||
pass
|
||||
85
ethereumetl/misc_utils.py
Normal file
85
ethereumetl/misc_utils.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import contextlib
|
||||
import csv
|
||||
import json
|
||||
|
||||
import six
|
||||
|
||||
from ethereumetl.csv_utils import set_max_field_size_limit
|
||||
from blockchainetl.file_utils import get_file_handle, smart_open
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def get_item_iterable(input_file):
|
||||
fh = get_file_handle(input_file, 'r')
|
||||
|
||||
if input_file.endswith('.csv'):
|
||||
set_max_field_size_limit()
|
||||
reader = csv.DictReader(fh)
|
||||
else:
|
||||
reader = (json.loads(line) for line in fh)
|
||||
|
||||
try:
|
||||
yield reader
|
||||
finally:
|
||||
fh.close()
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def get_item_sink(output_file):
|
||||
fh = get_file_handle(output_file, 'w')
|
||||
|
||||
if output_file.endswith('.csv'):
|
||||
set_max_field_size_limit()
|
||||
|
||||
writer = None
|
||||
|
||||
def sink(item):
|
||||
nonlocal writer
|
||||
if writer is None:
|
||||
fields = list(six.iterkeys(item))
|
||||
writer = csv.DictWriter(fh, fieldnames=fields, extrasaction='ignore')
|
||||
writer.writeheader()
|
||||
writer.writerow(item)
|
||||
else:
|
||||
def sink(item):
|
||||
fh.write(json.dumps(item) + '\n')
|
||||
|
||||
try:
|
||||
yield sink
|
||||
finally:
|
||||
fh.close()
|
||||
|
||||
|
||||
def filter_items(input_file, output_file, predicate):
|
||||
with get_item_iterable(input_file) as item_iterable, get_item_sink(output_file) as sink:
|
||||
for item in item_iterable:
|
||||
if predicate(item):
|
||||
sink(item)
|
||||
|
||||
|
||||
def extract_field(input_file, output_file, field):
|
||||
with get_item_iterable(input_file) as item_iterable, smart_open(output_file, 'w') as output:
|
||||
for item in item_iterable:
|
||||
output.write(item[field] + '\n')
|
||||
@@ -28,22 +28,22 @@ from web3 import IPCProvider, HTTPProvider
|
||||
from ethereumetl.providers.ipc import BatchIPCProvider
|
||||
from ethereumetl.providers.rpc import BatchHTTPProvider
|
||||
|
||||
DEFAULT_IPC_TIMEOUT = 60
|
||||
DEFAULT_HTTP_REQUEST_KWARGS = {'timeout': 60}
|
||||
DEFAULT_TIMEOUT = 60
|
||||
|
||||
|
||||
def get_provider_from_uri(uri_string, batch=False):
|
||||
def get_provider_from_uri(uri_string, timeout=DEFAULT_TIMEOUT, batch=False):
|
||||
uri = urlparse(uri_string)
|
||||
if uri.scheme == 'file':
|
||||
if batch:
|
||||
return BatchIPCProvider(uri.path, timeout=DEFAULT_IPC_TIMEOUT)
|
||||
return BatchIPCProvider(uri.path, timeout=timeout)
|
||||
else:
|
||||
return IPCProvider(uri.path, timeout=DEFAULT_IPC_TIMEOUT)
|
||||
return IPCProvider(uri.path, timeout=timeout)
|
||||
elif uri.scheme == 'http' or uri.scheme == 'https':
|
||||
request_kwargs = {'timeout': timeout}
|
||||
if batch:
|
||||
return BatchHTTPProvider(uri_string, request_kwargs=DEFAULT_HTTP_REQUEST_KWARGS)
|
||||
return BatchHTTPProvider(uri_string, request_kwargs=request_kwargs)
|
||||
else:
|
||||
return HTTPProvider(uri_string, request_kwargs=DEFAULT_HTTP_REQUEST_KWARGS)
|
||||
return HTTPProvider(uri_string, request_kwargs=request_kwargs)
|
||||
else:
|
||||
raise ValueError('Unknown uri scheme {}'.format(uri_string))
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user