mirror of
https://github.com/blockchain-etl/ethereum-etl.git
synced 2026-01-10 22:37:53 -05:00
Compare commits
50 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fbd57fc079 | ||
|
|
8204c0827d | ||
|
|
46b91a9ff2 | ||
|
|
b5fd64bdca | ||
|
|
d8547e9c7c | ||
|
|
7ef53859c1 | ||
|
|
e38d1c1f2f | ||
|
|
43fe6b49b3 | ||
|
|
db274c8a85 | ||
|
|
69247042a4 | ||
|
|
218e1e4356 | ||
|
|
5e0fc8cc75 | ||
|
|
77efda5106 | ||
|
|
ece0b7f422 | ||
|
|
b31b76a73a | ||
|
|
0cb7eb60b5 | ||
|
|
02943f7caf | ||
|
|
b844b95868 | ||
|
|
4d305a284f | ||
|
|
e161e6ef13 | ||
|
|
9b917b8ddd | ||
|
|
383caf8331 | ||
|
|
c61e91235f | ||
|
|
0e4b4a894b | ||
|
|
d58c1ebda7 | ||
|
|
f0bf07e60c | ||
|
|
efe7acdc13 | ||
|
|
20404eca9e | ||
|
|
435cbe0a74 | ||
|
|
b606e22cd5 | ||
|
|
4943b0b795 | ||
|
|
eed2068def | ||
|
|
313b4b1237 | ||
|
|
ad6149155e | ||
|
|
c55c0f68dc | ||
|
|
b031b04bc7 | ||
|
|
b314f1ed0c | ||
|
|
61eb2e6e21 | ||
|
|
9f62e7ecea | ||
|
|
4da7e7b23f | ||
|
|
de72ba3511 | ||
|
|
3aabf9aa54 | ||
|
|
284755bafc | ||
|
|
23133594e8 | ||
|
|
ca54ef6c4b | ||
|
|
836f30e198 | ||
|
|
1c6508f15d | ||
|
|
a4d6f8fcb1 | ||
|
|
bc79d7d9bf | ||
|
|
7fdcf0f7b7 |
14
.readthedocs.yaml
Normal file
14
.readthedocs.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
# Read the Docs configuration file for MkDocs projects
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Set the version of Python and other tools you might need
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.12"
|
||||
|
||||
mkdocs:
|
||||
configuration: mkdocs.yml
|
||||
4
LICENSE
4
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com, https://twitter.com/EvgeMedvedev
|
||||
Copyright (c) 2018-2025 Evgeny Medvedev, evge.medvedev@gmail.com, https://twitter.com/EvgeMedvedev
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
SOFTWARE.
|
||||
|
||||
12
README.md
12
README.md
@@ -1,9 +1,9 @@
|
||||
# Ethereum ETL
|
||||
|
||||
[](https://travis-ci.com/github/blockchain-etl/ethereum-etl)
|
||||
[](https://gitter.im/ethereum-etl/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[](https://github.com/blockchain-etl/ethereum-etl/blob/develop/LICENSE)
|
||||
[](https://t.me/BlockchainETL)
|
||||
[](https://discord.gg/tRKG7zGKtF)
|
||||
[](https://x.com/EthereumETL)
|
||||
|
||||
Ethereum ETL lets you convert blockchain data into convenient formats like CSVs and relational databases.
|
||||
|
||||
@@ -27,7 +27,7 @@ Export blocks and transactions ([Schema](docs/schema.md#blockscsv), [Reference](
|
||||
--provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c
|
||||
```
|
||||
|
||||
Export ERC20 and ERC721 transfers ([Schema](docs/schema.md#token_transferscsv), [Reference](docs/commands.md##export_token_transfers)):
|
||||
Export ERC20 and ERC721 transfers ([Schema](docs/schema.md#token_transferscsv), [Reference](docs/commands.md#export_token_transfers)):
|
||||
|
||||
```bash
|
||||
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
|
||||
@@ -78,7 +78,7 @@ For the latest version, check out the repo and call
|
||||
```bash
|
||||
> pip3 install -e .[dev,streaming]
|
||||
> export ETHEREUM_ETL_RUN_SLOW_TESTS=True
|
||||
> export PROVIDER_URL=<your_porvider_uri>
|
||||
> export PROVIDER_URL=<your_provider_uri>
|
||||
> pytest -vv
|
||||
```
|
||||
|
||||
@@ -109,9 +109,9 @@ For the latest version, check out the repo and call
|
||||
> echo "Stream to console"
|
||||
> docker run ethereum-etl:latest stream --start-block 500000 --log-file log.txt
|
||||
> echo "Stream to Pub/Sub"
|
||||
> docker run -v /path_to_credentials_file/:/ethereum-etl/ --env GOOGLE_APPLICATION_CREDENTIALS=/ethereum-etl/credentials_file.json ethereum-etl:latest stream --start-block 500000 --output projects/<your-project>/topics/crypto_ethereum
|
||||
> docker run -v /path_to_credentials_file/:/ethereum-etl/ --env GOOGLE_APPLICATION_CREDENTIALS=/ethereum-etl/credentials_file.json ethereum-etl:latest stream --start-block 500000 --output projects/<your_project>/topics/crypto_ethereum
|
||||
|
||||
If running on Apple M1 chip add the `--platform linux/x86_64` option to the `build` and `run` commands e.g.:
|
||||
If running on an Apple M1 chip add the `--platform linux/x86_64` option to the `build` and `run` commands e.g.:
|
||||
|
||||
```
|
||||
docker build --platform linux/x86_64 -t ethereum-etl:latest .
|
||||
|
||||
@@ -45,7 +45,7 @@ class BaseItemExporter(object):
|
||||
self._configure(kwargs)
|
||||
|
||||
def _configure(self, options, dont_fail=False):
|
||||
"""Configure the exporter by poping options from the ``options`` dict.
|
||||
"""Configure the exporter by popping options from the ``options`` dict.
|
||||
If dont_fail is set, it won't raise an exception on unexpected options
|
||||
(useful for using with keyword arguments in subclasses constructors)
|
||||
"""
|
||||
|
||||
@@ -30,13 +30,18 @@
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
|
||||
class SimpleItemConverter:
|
||||
|
||||
def __init__(self, field_converters=None):
|
||||
self.field_converters = field_converters
|
||||
|
||||
def convert_item(self, item):
|
||||
return {
|
||||
key: self.convert_field(key, value) for key, value in item.items()
|
||||
}
|
||||
|
||||
def convert_field(self, key, value):
|
||||
return value
|
||||
if self.field_converters is not None and key in self.field_converters:
|
||||
return self.field_converters[key](value)
|
||||
else:
|
||||
return value
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# Contact
|
||||
|
||||
- [D5 Discord Server](https://discord.gg/wukrezR)
|
||||
- [Telegram Group](https://t.me/joinchat/GsMpbA3mv1OJ6YMp3T5ORQ)
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
## Exporting the Blockchain
|
||||
|
||||
If you'd like to have blockchain data set up and hosted for you, [get in touch with us at D5](https://d5.ai/?ref=ethereumetl).
|
||||
|
||||
1. Install python 3.5.3+: [https://www.python.org/downloads/](https://www.python.org/downloads/)
|
||||
|
||||
1. You can use Infura if you don't need ERC20 transfers (Infura doesn't support eth_getFilterLogs JSON RPC method).
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Google BiqQuery
|
||||
# Google BigQuery
|
||||
|
||||
## Querying in BigQuery
|
||||
|
||||
@@ -16,4 +16,4 @@ Read [this article](https://medium.com/google-cloud/building-token-recommender-i
|
||||
|
||||
### Awesome BigQuery Views
|
||||
|
||||
[https://github.com/blockchain-etl/awesome-bigquery-views](https://github.com/blockchain-etl/awesome-bigquery-views)
|
||||
[https://github.com/blockchain-etl/awesome-bigquery-views](https://github.com/blockchain-etl/awesome-bigquery-views)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
Ethereum ETL lets you convert blockchain data into convenient formats like CSVs and relational databases.
|
||||
|
||||
With 1,700+ likes on GitHub, Ethereum ETL is the most popular open source project for Ethereum data.
|
||||
With 1,700+ likes on GitHub, Ethereum ETL is the most popular open-source project for Ethereum data.
|
||||
|
||||
Data is available for you to query right away in [Google BigQuery](https://goo.gl/oY5BCQ).
|
||||
|
||||
@@ -17,8 +17,31 @@ Easily export:
|
||||
* Receipts
|
||||
* Logs
|
||||
* Contracts
|
||||
* Internal transactions
|
||||
* Internal transactions (traces)
|
||||
|
||||
## Advanced Features
|
||||
|
||||
* Stream blockchain data to Pub/Sub, Postgres, or other destinations in real-time
|
||||
* Filter and transform data using flexible command-line options
|
||||
* Support for multiple Ethereum node providers (Geth, Parity, Infura, etc.)
|
||||
* Handles chain reorganizations through configurable lag
|
||||
* Export data by block range or by date
|
||||
* Scalable architecture with configurable batch sizes and worker counts
|
||||
|
||||
## Use Cases
|
||||
|
||||
* Data analysis and visualization
|
||||
* Machine learning on blockchain data
|
||||
* Building analytics dashboards
|
||||
* Market research and token analysis
|
||||
* Compliance and audit reporting
|
||||
* Academic research on blockchain economics
|
||||
|
||||
## Projects using Ethereum ETL
|
||||
* [Google](https://goo.gl/oY5BCQ) - Public BigQuery Ethereum datasets
|
||||
* [Nansen](https://nansen.ai/query?ref=ethereumetl) - Analytics platform for Ethereum
|
||||
* [Ethereum Blockchain ETL on GCP](https://cloud.google.com/blog/products/data-analytics/ethereum-bigquery-public-dataset-smart-contract-analytics) - Official Google Cloud reference architecture
|
||||
|
||||
## Getting Started
|
||||
|
||||
Check the [Quickstart](quickstart.md) guide to begin using Ethereum ETL or explore the [Commands](commands.md) page for detailed usage instructions.
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
which means `is_erc20` and `is_erc721` will always be false for proxy contracts and they will be missing in the `tokens`
|
||||
table.
|
||||
- The metadata methods (`symbol`, `name`, `decimals`, `total_supply`) for ERC20 are optional, so around 10% of the
|
||||
contracts are missing this data. Also some contracts (EOS) implement these methods but with wrong return type,
|
||||
contracts are missing this data. Also some contracts (EOS) implement these methods but with the wrong return type,
|
||||
so the metadata columns are missing in this case as well.
|
||||
- `token_transfers.value`, `tokens.decimals` and `tokens.total_supply` have type `STRING` in BigQuery tables,
|
||||
because numeric types there can't handle 32-byte integers. You should use
|
||||
@@ -12,4 +12,4 @@ because numeric types there can't handle 32-byte integers. You should use
|
||||
`safe_cast(value as NUMERIC)` (possible overflow) to convert to numbers.
|
||||
- The contracts that don't implement `decimals()` function but have the
|
||||
[fallback function](https://solidity.readthedocs.io/en/v0.4.21/contracts.html#fallback-function) that returns a `boolean`
|
||||
will have `0` or `1` in the `decimals` column in the CSVs.
|
||||
will have `0` or `1` in the `decimals` column in the CSVs.
|
||||
|
||||
@@ -7,5 +7,4 @@
|
||||
- [Introducing six new cryptocurrencies in BigQuery Public Datasets—and how to analyze them](https://cloud.google.com/blog/products/data-analytics/introducing-six-new-cryptocurrencies-in-bigquery-public-datasets-and-how-to-analyze-them)
|
||||
- [Querying the Ethereum Blockchain in Snowflake](https://community.snowflake.com/s/article/Querying-the-Ethereum-Blockchain-in-Snowflake)
|
||||
- [ConsenSys Grants funds third cohort of projects to benefit the Ethereum ecosystem](https://www.cryptoninjas.net/2020/02/17/consensys-grants-funds-third-cohort-of-projects-to-benefit-the-ethereum-ecosystem/)
|
||||
- [Ivan on Tech overviews crypto datasets in BigQuery](https://youtu.be/2IkJBNhsXNY?t=239)
|
||||
- [Unlocking the Power of Google BigQuery (Cloud Next '19)](https://youtu.be/KL_i5XZIaJg?t=131)
|
||||
|
||||
@@ -153,7 +153,7 @@ trace_id | string |
|
||||
|
||||
### Differences between geth and parity traces.csv
|
||||
|
||||
- `to_address` field differs for `callcode` trace (geth seems to return correct value, as parity value of `to_address` is same as `to_address` of parent call);
|
||||
- `to_address` field differs for `callcode` trace (geth seems to return correct value, as parity value of `to_address` is the same as `to_address` of parent call);
|
||||
- geth output doesn't have `reward` traces;
|
||||
- geth output doesn't have `to_address`, `from_address`, `value` for `suicide` traces;
|
||||
- `error` field contains human readable error message, which might differ in geth/parity output;
|
||||
|
||||
@@ -48,7 +48,7 @@ from ethereumetl.cli.stream import stream
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option(version='2.4.0')
|
||||
@click.version_option(version='2.4.2')
|
||||
@click.pass_context
|
||||
def cli(ctx):
|
||||
pass
|
||||
|
||||
@@ -44,7 +44,7 @@ class BaseItemExporter(object):
|
||||
self._configure(kwargs)
|
||||
|
||||
def _configure(self, options, dont_fail=False):
|
||||
"""Configure the exporter by poping options from the ``options`` dict.
|
||||
"""Configure the exporter by popping options from the ``options`` dict.
|
||||
If dont_fail is set, it won't raise an exception on unexpected options
|
||||
(useful for using with keyword arguments in subclasses constructors)
|
||||
"""
|
||||
|
||||
@@ -15,7 +15,7 @@ def get_origin_ipfs_client():
|
||||
|
||||
|
||||
# Parses the shop's HTML index page to extract the name of the IPFS directory under
|
||||
# which all the shops data is located.
|
||||
# which all the shop data is located.
|
||||
def _get_shop_data_dir(shop_index_page):
|
||||
match = re.search('<link rel="data-dir" href="(.+?)"', shop_index_page)
|
||||
return match.group(1) if match else None
|
||||
|
||||
@@ -95,7 +95,7 @@ class ExportOriginJob(BaseJob):
|
||||
})
|
||||
|
||||
for batch in batches:
|
||||
# https://github.com/ethereum/wiki/wiki/JSON-RPC#eth_getfilterlogs
|
||||
# https://ethereum.org/en/developers/docs/apis/json-rpc/#eth_getfilterlogs
|
||||
filter_params = {
|
||||
'address': batch['contract_address'],
|
||||
'fromBlock': batch['from_block'],
|
||||
|
||||
@@ -65,7 +65,7 @@ class ExportTokenTransfersJob(BaseJob):
|
||||
|
||||
def _export_batch(self, block_number_batch):
|
||||
assert len(block_number_batch) > 0
|
||||
# https://github.com/ethereum/wiki/wiki/JSON-RPC#eth_getfilterlogs
|
||||
# https://ethereum.org/en/developers/docs/apis/json-rpc/#eth_getfilterlogs
|
||||
filter_params = {
|
||||
'fromBlock': block_number_batch[0],
|
||||
'toBlock': block_number_batch[-1],
|
||||
|
||||
@@ -54,7 +54,7 @@ class EthContractService:
|
||||
c.implements('allowance(address,address)')
|
||||
|
||||
# https://github.com/ethereum/EIPs/blob/master/EIPS/eip-721.md
|
||||
# https://github.com/OpenZeppelin/openzeppelin-solidity/blob/master/contracts/token/ERC721/ERC721Basic.sol
|
||||
# https://github.com/OpenZeppelin/openzeppelin-contracts/blob/master/contracts/token/ERC721/ERC721.sol
|
||||
# Doesn't check the below ERC721 methods to match CryptoKitties contract
|
||||
# getApproved(uint256)
|
||||
# setApprovalForAll(address,bool)
|
||||
|
||||
@@ -62,8 +62,12 @@ def create_item_exporter(output):
|
||||
from blockchainetl.jobs.exporters.converters.unix_timestamp_item_converter import UnixTimestampItemConverter
|
||||
from blockchainetl.jobs.exporters.converters.int_to_decimal_item_converter import IntToDecimalItemConverter
|
||||
from blockchainetl.jobs.exporters.converters.list_field_item_converter import ListFieldItemConverter
|
||||
from blockchainetl.jobs.exporters.converters.simple_item_converter import SimpleItemConverter
|
||||
from ethereumetl.streaming.postgres_tables import BLOCKS, TRANSACTIONS, LOGS, TOKEN_TRANSFERS, TRACES, TOKENS, CONTRACTS
|
||||
|
||||
def array_to_str(val):
|
||||
return ','.join(val) if val is not None else None
|
||||
|
||||
item_exporter = PostgresItemExporter(
|
||||
output, item_type_to_insert_stmt_mapping={
|
||||
'block': create_insert_statement_for_table(BLOCKS),
|
||||
@@ -74,8 +78,12 @@ def create_item_exporter(output):
|
||||
'token': create_insert_statement_for_table(TOKENS),
|
||||
'contract': create_insert_statement_for_table(CONTRACTS),
|
||||
},
|
||||
converters=[UnixTimestampItemConverter(), IntToDecimalItemConverter(),
|
||||
ListFieldItemConverter('topics', 'topic', fill=4)])
|
||||
converters=[
|
||||
UnixTimestampItemConverter(),
|
||||
IntToDecimalItemConverter(),
|
||||
ListFieldItemConverter('topics', 'topic', fill=4),
|
||||
SimpleItemConverter(field_converters={'blob_versioned_hashes': array_to_str})
|
||||
])
|
||||
elif item_exporter_type == ItemExporterType.GCS:
|
||||
from blockchainetl.jobs.exporters.gcs_item_exporter import GcsItemExporter
|
||||
bucket, path = get_bucket_and_path_from_gcs_output(output)
|
||||
|
||||
@@ -49,6 +49,9 @@ BLOCKS = Table(
|
||||
Column('gas_used', BigInteger),
|
||||
Column('transaction_count', BigInteger),
|
||||
Column('base_fee_per_gas', BigInteger),
|
||||
Column('withdrawals_root', String),
|
||||
Column('blob_gas_used', BigInteger),
|
||||
Column('excess_blob_gas', BigInteger),
|
||||
)
|
||||
|
||||
TRANSACTIONS = Table(
|
||||
@@ -78,6 +81,10 @@ TRANSACTIONS = Table(
|
||||
Column('receipt_l1_gas_used', BigInteger),
|
||||
Column('receipt_l1_gas_price', BigInteger),
|
||||
Column('receipt_l1_fee_scalar', Float),
|
||||
Column('max_fee_per_blob_gas', BigInteger),
|
||||
Column('blob_versioned_hashes', String),
|
||||
Column('receipt_blob_gas_price', BigInteger),
|
||||
Column('receipt_blob_gas_used', BigInteger),
|
||||
)
|
||||
|
||||
LOGS = Table(
|
||||
|
||||
2
setup.py
2
setup.py
@@ -11,7 +11,7 @@ long_description = read('README.md') if os.path.isfile("README.md") else ""
|
||||
|
||||
setup(
|
||||
name='ethereum-etl',
|
||||
version='2.4.0',
|
||||
version='2.4.2',
|
||||
author='Evgeny Medvedev',
|
||||
author_email='evge.medvedev@gmail.com',
|
||||
description='Tools for exporting Ethereum blockchain data to CSV or JSON',
|
||||
|
||||
Reference in New Issue
Block a user