mirror of
https://github.com/blockchain-etl/ethereum-etl.git
synced 2026-01-11 14:48:11 -05:00
Compare commits
44 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fbd57fc079 | ||
|
|
8204c0827d | ||
|
|
46b91a9ff2 | ||
|
|
b5fd64bdca | ||
|
|
d8547e9c7c | ||
|
|
7ef53859c1 | ||
|
|
e38d1c1f2f | ||
|
|
43fe6b49b3 | ||
|
|
db274c8a85 | ||
|
|
69247042a4 | ||
|
|
218e1e4356 | ||
|
|
5e0fc8cc75 | ||
|
|
77efda5106 | ||
|
|
ece0b7f422 | ||
|
|
b31b76a73a | ||
|
|
0cb7eb60b5 | ||
|
|
02943f7caf | ||
|
|
b844b95868 | ||
|
|
4d305a284f | ||
|
|
e161e6ef13 | ||
|
|
9b917b8ddd | ||
|
|
383caf8331 | ||
|
|
c61e91235f | ||
|
|
0e4b4a894b | ||
|
|
d58c1ebda7 | ||
|
|
f0bf07e60c | ||
|
|
efe7acdc13 | ||
|
|
20404eca9e | ||
|
|
435cbe0a74 | ||
|
|
b606e22cd5 | ||
|
|
4943b0b795 | ||
|
|
eed2068def | ||
|
|
313b4b1237 | ||
|
|
ad6149155e | ||
|
|
c55c0f68dc | ||
|
|
b031b04bc7 | ||
|
|
b314f1ed0c | ||
|
|
61eb2e6e21 | ||
|
|
9f62e7ecea | ||
|
|
4da7e7b23f | ||
|
|
de72ba3511 | ||
|
|
3aabf9aa54 | ||
|
|
284755bafc | ||
|
|
23133594e8 |
4
LICENSE
4
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com, https://twitter.com/EvgeMedvedev
|
||||
Copyright (c) 2018-2025 Evgeny Medvedev, evge.medvedev@gmail.com, https://twitter.com/EvgeMedvedev
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
SOFTWARE.
|
||||
|
||||
12
README.md
12
README.md
@@ -1,9 +1,9 @@
|
||||
# Ethereum ETL
|
||||
|
||||
[](https://travis-ci.com/github/blockchain-etl/ethereum-etl)
|
||||
[](https://gitter.im/ethereum-etl/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[](https://github.com/blockchain-etl/ethereum-etl/blob/develop/LICENSE)
|
||||
[](https://t.me/BlockchainETL)
|
||||
[](https://discord.gg/tRKG7zGKtF)
|
||||
[](https://x.com/EthereumETL)
|
||||
|
||||
Ethereum ETL lets you convert blockchain data into convenient formats like CSVs and relational databases.
|
||||
|
||||
@@ -27,7 +27,7 @@ Export blocks and transactions ([Schema](docs/schema.md#blockscsv), [Reference](
|
||||
--provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c
|
||||
```
|
||||
|
||||
Export ERC20 and ERC721 transfers ([Schema](docs/schema.md#token_transferscsv), [Reference](docs/commands.md##export_token_transfers)):
|
||||
Export ERC20 and ERC721 transfers ([Schema](docs/schema.md#token_transferscsv), [Reference](docs/commands.md#export_token_transfers)):
|
||||
|
||||
```bash
|
||||
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
|
||||
@@ -78,7 +78,7 @@ For the latest version, check out the repo and call
|
||||
```bash
|
||||
> pip3 install -e .[dev,streaming]
|
||||
> export ETHEREUM_ETL_RUN_SLOW_TESTS=True
|
||||
> export PROVIDER_URL=<your_porvider_uri>
|
||||
> export PROVIDER_URL=<your_provider_uri>
|
||||
> pytest -vv
|
||||
```
|
||||
|
||||
@@ -109,9 +109,9 @@ For the latest version, check out the repo and call
|
||||
> echo "Stream to console"
|
||||
> docker run ethereum-etl:latest stream --start-block 500000 --log-file log.txt
|
||||
> echo "Stream to Pub/Sub"
|
||||
> docker run -v /path_to_credentials_file/:/ethereum-etl/ --env GOOGLE_APPLICATION_CREDENTIALS=/ethereum-etl/credentials_file.json ethereum-etl:latest stream --start-block 500000 --output projects/<your-project>/topics/crypto_ethereum
|
||||
> docker run -v /path_to_credentials_file/:/ethereum-etl/ --env GOOGLE_APPLICATION_CREDENTIALS=/ethereum-etl/credentials_file.json ethereum-etl:latest stream --start-block 500000 --output projects/<your_project>/topics/crypto_ethereum
|
||||
|
||||
If running on Apple M1 chip add the `--platform linux/x86_64` option to the `build` and `run` commands e.g.:
|
||||
If running on an Apple M1 chip add the `--platform linux/x86_64` option to the `build` and `run` commands e.g.:
|
||||
|
||||
```
|
||||
docker build --platform linux/x86_64 -t ethereum-etl:latest .
|
||||
|
||||
@@ -45,7 +45,7 @@ class BaseItemExporter(object):
|
||||
self._configure(kwargs)
|
||||
|
||||
def _configure(self, options, dont_fail=False):
|
||||
"""Configure the exporter by poping options from the ``options`` dict.
|
||||
"""Configure the exporter by popping options from the ``options`` dict.
|
||||
If dont_fail is set, it won't raise an exception on unexpected options
|
||||
(useful for using with keyword arguments in subclasses constructors)
|
||||
"""
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# Contact
|
||||
|
||||
- [D5 Discord Server](https://discord.gg/wukrezR)
|
||||
- [Telegram Group](https://t.me/joinchat/GsMpbA3mv1OJ6YMp3T5ORQ)
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
## Exporting the Blockchain
|
||||
|
||||
If you'd like to have blockchain data set up and hosted for you, [get in touch with us at D5](https://d5.ai/?ref=ethereumetl).
|
||||
|
||||
1. Install python 3.5.3+: [https://www.python.org/downloads/](https://www.python.org/downloads/)
|
||||
|
||||
1. You can use Infura if you don't need ERC20 transfers (Infura doesn't support eth_getFilterLogs JSON RPC method).
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Google BiqQuery
|
||||
# Google BigQuery
|
||||
|
||||
## Querying in BigQuery
|
||||
|
||||
@@ -16,4 +16,4 @@ Read [this article](https://medium.com/google-cloud/building-token-recommender-i
|
||||
|
||||
### Awesome BigQuery Views
|
||||
|
||||
[https://github.com/blockchain-etl/awesome-bigquery-views](https://github.com/blockchain-etl/awesome-bigquery-views)
|
||||
[https://github.com/blockchain-etl/awesome-bigquery-views](https://github.com/blockchain-etl/awesome-bigquery-views)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
Ethereum ETL lets you convert blockchain data into convenient formats like CSVs and relational databases.
|
||||
|
||||
With 1,700+ likes on GitHub, Ethereum ETL is the most popular open source project for Ethereum data.
|
||||
With 1,700+ likes on GitHub, Ethereum ETL is the most popular open-source project for Ethereum data.
|
||||
|
||||
Data is available for you to query right away in [Google BigQuery](https://goo.gl/oY5BCQ).
|
||||
|
||||
@@ -17,8 +17,31 @@ Easily export:
|
||||
* Receipts
|
||||
* Logs
|
||||
* Contracts
|
||||
* Internal transactions
|
||||
* Internal transactions (traces)
|
||||
|
||||
## Advanced Features
|
||||
|
||||
* Stream blockchain data to Pub/Sub, Postgres, or other destinations in real-time
|
||||
* Filter and transform data using flexible command-line options
|
||||
* Support for multiple Ethereum node providers (Geth, Parity, Infura, etc.)
|
||||
* Handles chain reorganizations through configurable lag
|
||||
* Export data by block range or by date
|
||||
* Scalable architecture with configurable batch sizes and worker counts
|
||||
|
||||
## Use Cases
|
||||
|
||||
* Data analysis and visualization
|
||||
* Machine learning on blockchain data
|
||||
* Building analytics dashboards
|
||||
* Market research and token analysis
|
||||
* Compliance and audit reporting
|
||||
* Academic research on blockchain economics
|
||||
|
||||
## Projects using Ethereum ETL
|
||||
* [Google](https://goo.gl/oY5BCQ) - Public BigQuery Ethereum datasets
|
||||
* [Nansen](https://nansen.ai/query?ref=ethereumetl) - Analytics platform for Ethereum
|
||||
* [Ethereum Blockchain ETL on GCP](https://cloud.google.com/blog/products/data-analytics/ethereum-bigquery-public-dataset-smart-contract-analytics) - Official Google Cloud reference architecture
|
||||
|
||||
## Getting Started
|
||||
|
||||
Check the [Quickstart](quickstart.md) guide to begin using Ethereum ETL or explore the [Commands](commands.md) page for detailed usage instructions.
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
which means `is_erc20` and `is_erc721` will always be false for proxy contracts and they will be missing in the `tokens`
|
||||
table.
|
||||
- The metadata methods (`symbol`, `name`, `decimals`, `total_supply`) for ERC20 are optional, so around 10% of the
|
||||
contracts are missing this data. Also some contracts (EOS) implement these methods but with wrong return type,
|
||||
contracts are missing this data. Also some contracts (EOS) implement these methods but with the wrong return type,
|
||||
so the metadata columns are missing in this case as well.
|
||||
- `token_transfers.value`, `tokens.decimals` and `tokens.total_supply` have type `STRING` in BigQuery tables,
|
||||
because numeric types there can't handle 32-byte integers. You should use
|
||||
@@ -12,4 +12,4 @@ because numeric types there can't handle 32-byte integers. You should use
|
||||
`safe_cast(value as NUMERIC)` (possible overflow) to convert to numbers.
|
||||
- The contracts that don't implement `decimals()` function but have the
|
||||
[fallback function](https://solidity.readthedocs.io/en/v0.4.21/contracts.html#fallback-function) that returns a `boolean`
|
||||
will have `0` or `1` in the `decimals` column in the CSVs.
|
||||
will have `0` or `1` in the `decimals` column in the CSVs.
|
||||
|
||||
@@ -7,5 +7,4 @@
|
||||
- [Introducing six new cryptocurrencies in BigQuery Public Datasets—and how to analyze them](https://cloud.google.com/blog/products/data-analytics/introducing-six-new-cryptocurrencies-in-bigquery-public-datasets-and-how-to-analyze-them)
|
||||
- [Querying the Ethereum Blockchain in Snowflake](https://community.snowflake.com/s/article/Querying-the-Ethereum-Blockchain-in-Snowflake)
|
||||
- [ConsenSys Grants funds third cohort of projects to benefit the Ethereum ecosystem](https://www.cryptoninjas.net/2020/02/17/consensys-grants-funds-third-cohort-of-projects-to-benefit-the-ethereum-ecosystem/)
|
||||
- [Ivan on Tech overviews crypto datasets in BigQuery](https://youtu.be/2IkJBNhsXNY?t=239)
|
||||
- [Unlocking the Power of Google BigQuery (Cloud Next '19)](https://youtu.be/KL_i5XZIaJg?t=131)
|
||||
|
||||
@@ -153,7 +153,7 @@ trace_id | string |
|
||||
|
||||
### Differences between geth and parity traces.csv
|
||||
|
||||
- `to_address` field differs for `callcode` trace (geth seems to return correct value, as parity value of `to_address` is same as `to_address` of parent call);
|
||||
- `to_address` field differs for `callcode` trace (geth seems to return correct value, as parity value of `to_address` is the same as `to_address` of parent call);
|
||||
- geth output doesn't have `reward` traces;
|
||||
- geth output doesn't have `to_address`, `from_address`, `value` for `suicide` traces;
|
||||
- `error` field contains human readable error message, which might differ in geth/parity output;
|
||||
|
||||
@@ -44,7 +44,7 @@ class BaseItemExporter(object):
|
||||
self._configure(kwargs)
|
||||
|
||||
def _configure(self, options, dont_fail=False):
|
||||
"""Configure the exporter by poping options from the ``options`` dict.
|
||||
"""Configure the exporter by popping options from the ``options`` dict.
|
||||
If dont_fail is set, it won't raise an exception on unexpected options
|
||||
(useful for using with keyword arguments in subclasses constructors)
|
||||
"""
|
||||
|
||||
@@ -15,7 +15,7 @@ def get_origin_ipfs_client():
|
||||
|
||||
|
||||
# Parses the shop's HTML index page to extract the name of the IPFS directory under
|
||||
# which all the shops data is located.
|
||||
# which all the shop data is located.
|
||||
def _get_shop_data_dir(shop_index_page):
|
||||
match = re.search('<link rel="data-dir" href="(.+?)"', shop_index_page)
|
||||
return match.group(1) if match else None
|
||||
|
||||
@@ -95,7 +95,7 @@ class ExportOriginJob(BaseJob):
|
||||
})
|
||||
|
||||
for batch in batches:
|
||||
# https://github.com/ethereum/wiki/wiki/JSON-RPC#eth_getfilterlogs
|
||||
# https://ethereum.org/en/developers/docs/apis/json-rpc/#eth_getfilterlogs
|
||||
filter_params = {
|
||||
'address': batch['contract_address'],
|
||||
'fromBlock': batch['from_block'],
|
||||
|
||||
@@ -65,7 +65,7 @@ class ExportTokenTransfersJob(BaseJob):
|
||||
|
||||
def _export_batch(self, block_number_batch):
|
||||
assert len(block_number_batch) > 0
|
||||
# https://github.com/ethereum/wiki/wiki/JSON-RPC#eth_getfilterlogs
|
||||
# https://ethereum.org/en/developers/docs/apis/json-rpc/#eth_getfilterlogs
|
||||
filter_params = {
|
||||
'fromBlock': block_number_batch[0],
|
||||
'toBlock': block_number_batch[-1],
|
||||
|
||||
@@ -54,7 +54,7 @@ class EthContractService:
|
||||
c.implements('allowance(address,address)')
|
||||
|
||||
# https://github.com/ethereum/EIPs/blob/master/EIPS/eip-721.md
|
||||
# https://github.com/OpenZeppelin/openzeppelin-solidity/blob/master/contracts/token/ERC721/ERC721Basic.sol
|
||||
# https://github.com/OpenZeppelin/openzeppelin-contracts/blob/master/contracts/token/ERC721/ERC721.sol
|
||||
# Doesn't check the below ERC721 methods to match CryptoKitties contract
|
||||
# getApproved(uint256)
|
||||
# setApprovalForAll(address,bool)
|
||||
|
||||
Reference in New Issue
Block a user