Compare commits

...

320 Commits

Author SHA1 Message Date
Evgeny Medvedev
fbd57fc079 Merge pull request #523 from GarmashAlex/fix/broken-link
docs: fix broken link
2025-08-27 09:07:52 +07:00
GarmashAlex
8204c0827d docs: fix broken link 2025-08-26 19:09:29 +03:00
Evgeny Medvedev
46b91a9ff2 Merge pull request #522 from mdqst/patch-1
docs: fix broken video link
2025-08-22 20:49:33 +07:00
Dmitry
b5fd64bdca docs: fix broken video link 2025-08-22 14:50:56 +03:00
Evgeny Medvedev
d8547e9c7c Merge pull request #521 from Galoretka/fix/broken-link
fix(docs): update Ethereum JSON-RPC links in ETL jobs
2025-08-21 14:54:21 +07:00
Galoretka
7ef53859c1 fix:broken link 2025-08-21 10:51:51 +03:00
Galoretka
e38d1c1f2f fix: broken link 2025-08-21 10:51:22 +03:00
Evgeny Medvedev
43fe6b49b3 Merge pull request #519 from blockchain-etl/medvedev1088-patch-1
Remove gitter link in README.md
2025-04-30 15:26:44 +07:00
Evgeny Medvedev
db274c8a85 Update README.md 2025-04-30 15:24:40 +07:00
Evgeny Medvedev
69247042a4 Merge pull request #518 from oksanaphmn/patch-1
docs: add license badge
2025-04-30 15:23:15 +07:00
oksanaphmn
218e1e4356 Update README.md 2025-04-27 13:16:05 +03:00
Evgeny Medvedev
5e0fc8cc75 Merge pull request #516 from gap-editor/develop
deleted link to discord from 'contact.md'
2025-04-05 09:13:55 +07:00
Maximilian Hubert
77efda5106 Update contact.md 2025-04-04 20:35:34 +02:00
Evgeny Medvedev
ece0b7f422 Merge pull request #515 from VolodymyrBg/bg
docs: extension of documentation in index.md with the addition of adv…
2025-04-04 21:34:37 +07:00
VolodymyrBg
b31b76a73a Update index.md 2025-04-04 17:33:03 +03:00
VolodymyrBg
0cb7eb60b5 docs: extension of documentation in index.md with the addition of advanced features and new projects 2025-04-02 20:02:14 +03:00
Evgeny Medvedev
02943f7caf Merge pull request #514 from blockchain-etl/medvedev1088-patch-1
Update exporting-the-blockchain.md
2025-04-01 09:23:59 +07:00
Evgeny Medvedev
b844b95868 Update exporting-the-blockchain.md 2025-04-01 09:22:53 +07:00
Evgeny Medvedev
4d305a284f Merge pull request #513 from Hopium21/patch-1
remove broken link to D5.ai
2025-04-01 09:22:15 +07:00
Hopium
e161e6ef13 Update exporting-the-blockchain.md 2025-03-31 20:28:23 +02:00
Evgeny Medvedev
9b917b8ddd Update README.md 2025-03-04 19:15:39 +07:00
Evgeny Medvedev
383caf8331 Merge pull request #511 from Radovenchyk/patch-2
docs: removed the discord link
2025-03-04 19:13:25 +07:00
Radovenchyk
c61e91235f Update README.md 2025-03-04 11:36:05 +02:00
Evgeny Medvedev
0e4b4a894b Merge pull request #510 from Radovenchyk/patch-1
docs: added shield and twitter link
2025-03-03 21:50:15 +07:00
Radovenchyk
d58c1ebda7 Update README.md 2025-03-03 16:37:36 +02:00
Evgeny Medvedev
f0bf07e60c Merge pull request #509 from maximevtush/patch-1
Update LICENSE
2025-01-30 18:16:00 +08:00
Maxim Evtush
efe7acdc13 Update LICENSE 2025-01-30 11:07:04 +01:00
Evgeny Medvedev
20404eca9e Merge pull request #506 from romashka-btc/code/fix
typos/fix
2024-12-19 11:33:47 +08:00
Romashka
435cbe0a74 typo-Update exporters.py 2024-12-18 20:36:43 +02:00
Romashka
b606e22cd5 typo-Update exporters.py 2024-12-18 20:36:18 +02:00
Evgeny Medvedev
4943b0b795 Merge pull request #505 from XxAlex74xX/patch-1
typo README.md
2024-12-18 15:51:25 +08:00
XxAlex74xX
eed2068def Update README.md 2024-12-18 07:38:38 +01:00
Evgeny Medvedev
313b4b1237 Merge pull request #503 from Guayaba221/develop
docs fix spelling issues
2024-12-15 21:00:53 +08:00
planetBoy
ad6149155e Update exporting-the-blockchain.md 2024-12-15 10:11:54 +01:00
Evgeny Medvedev
c55c0f68dc Merge pull request #502 from futreall/develop
Fix significant typo in documentation
2024-12-15 11:43:33 +08:00
futreall
b031b04bc7 Update google-bigquery.md 2024-12-14 20:40:47 +02:00
Evgeny Medvedev
b314f1ed0c Merge pull request #501 from vtjl10/develop
fix: typos in documentation files
2024-12-15 00:21:31 +08:00
fuder.eth
61eb2e6e21 Update README.md 2024-12-14 13:27:01 +01:00
Evgeny Medvedev
9f62e7ecea Merge pull request #492 from nnsW3/docs-improvement
Docs improvement
2024-06-26 09:41:09 +08:00
Elias Rad
4da7e7b23f fix README.md 2024-06-25 20:06:41 +03:00
Elias Rad
de72ba3511 fix origin.py 2024-06-25 20:04:51 +03:00
Elias Rad
3aabf9aa54 fix schema.md 2024-06-25 20:02:55 +03:00
Elias Rad
284755bafc fix limitations.md 2024-06-25 20:02:26 +03:00
Elias Rad
23133594e8 fix index.md 2024-06-25 20:02:14 +03:00
evgeny
ca54ef6c4b Bump version 2024-04-11 19:42:39 +07:00
Evgeny Medvedev
836f30e198 Merge pull request #488 from blockchain-etl/add_dencun_fields_to_postgres_tables
Add Dencun fields to postgres_tables.py
2024-04-11 20:41:49 +08:00
evgeny
1c6508f15d Add Dencun fields to postgres_tables.py 2024-04-11 19:38:27 +07:00
Evgeny Medvedev
a4d6f8fcb1 Merge pull request #487 from blockchain-etl/add_readthedocs_yaml
Add .readthedocs.yaml
2024-04-11 10:58:07 +08:00
evgeny
bc79d7d9bf Add .readthedocs.yaml 2024-04-11 09:56:49 +07:00
medvedev1088
7fdcf0f7b7 Bump version 2024-04-03 12:42:38 +08:00
medvedev1088
d3330f7ddc Bump version 2024-04-03 12:21:40 +08:00
Evgeny Medvedev
1066ec9025 Merge pull request #484 from blockchain-etl/dencun_upgrade
Add EIP-4844 (Dencun) columns
2024-04-03 12:20:11 +08:00
medvedev1088
2a92ecbf31 Revert column width in schema.md 2024-03-29 14:38:23 +08:00
medvedev1088
c238e8b57b Add withdrawals_root and withdrawals to schema.md 2024-03-29 14:17:31 +08:00
medvedev1088
a27d2427e1 Trigger build 2024-03-29 13:47:54 +08:00
medvedev1088
c18f78506c Add tests for Dencun transactions 2024-03-29 13:37:18 +08:00
medvedev1088
23bad940db Fix slow tests 2024-03-29 13:11:35 +08:00
medvedev1088
0a52db4b8a Fix tests 2024-03-29 13:06:02 +08:00
medvedev1088
9fd1f906f2 Add EIP-4844 fields to blocks, fix missing comma, updated enrich.py, update docs 2024-03-29 12:44:44 +08:00
Evgeny Medvedev
f08f93ddfe Merge pull request #474 from haiyanghe/develop
Dencun transaction and transaction receipt fields
2024-03-29 11:55:58 +08:00
Evgeny Medvedev
9e51c3b8d4 Merge pull request #478 from blockchain-etl/delete_funding_yaml
Delete FUNDING.yml as the link is broken
2024-03-12 12:24:00 +08:00
medvedev1088
79d341ea45 Delete FUNDING.yml as the link is broken 2024-03-12 12:23:27 +08:00
haiyanghe
9db1ff104a Dencun transaction and transaction receipt fields 2024-02-07 20:19:49 +00:00
medvedev1088
952a49ba4b Bump version 2023-08-25 17:14:23 +07:00
Allen Day
aab122ebf3 Merge pull request #456 from sfsf9797/fix-datatype
Fix datatype
2023-08-23 14:02:50 +08:00
sfsf9797
438c9af751 fix error msg 2023-08-18 00:41:16 +08:00
sfsf9797
3ec2af25e1 fix 2023-08-18 00:40:18 +08:00
medvedev1088
84101407c1 Bump version 2023-08-02 20:17:29 +08:00
Evgeny Medvedev
97a0275ced Merge pull request #446 from MSilb7/develop
Add Optimism / OP Stack Transaction Receipt Fields
2023-08-02 20:11:52 +08:00
Michael Silberling
7cbfd0e533 Update receipts.sql
fee scalar to decimal
2023-07-06 17:19:27 -04:00
Michael Silberling
94ebd3f3e9 mod scalar to be a float 2023-06-02 16:33:51 -04:00
Michael Silberling
c0fd158211 add to streaming 2023-06-01 15:22:59 -04:00
Michael Silberling
7529c43f4e update 2023-06-01 11:59:55 -04:00
Michael Silberling
ce906f0af1 Merge branch 'blockchain-etl:develop' into develop 2023-06-01 11:54:06 -04:00
Michael Silberling
eaf4bf0bf2 add tests 2023-06-01 11:53:33 -04:00
Evgeny Medvedev
1a0a8cf0f8 Merge pull request #449 from vinhloc30796/fix/minor-error-msg
Message: "start_timestamp must be less than end_timestamp"
2023-06-01 16:08:43 +08:00
Loc Nguyen
f0e4302423 Fix again 2023-06-01 14:36:20 +07:00
Loc Nguyen
fb35431aa7 start_timestamp must be lesser or equal to end_timestamp 2023-06-01 14:13:57 +07:00
Evgeny Medvedev
87b1669434 Merge pull request #447 from blockchain-etl/fix_build_wrong_ssl_version
Restrict urllib3 v2 as it breaks the build
2023-05-29 18:00:46 +08:00
medvedev1088
9678bb91c7 Add version restriction for urllib3 as it breaks Travis build 2023-05-29 17:55:14 +08:00
medvedev1088
f4e2b57463 Install latest libssl-dev to fix build error in Travis CI 2023-05-29 17:36:05 +08:00
Michael Silberling
6599a438a0 add to sql 2023-05-26 13:05:22 -04:00
Michael Silberling
f8a5f25376 rm comma 2023-05-25 18:15:21 -04:00
Michael Silberling
de96e394ee rm l1 gas paid 2023-05-25 18:02:38 -04:00
Michael Silberling
a58fe4585d Revert "Update README.md"
This reverts commit aae968cd4b.
2023-05-25 18:01:25 -04:00
Michael Silberling
f8878ff320 Revert "Update README.md"
This reverts commit 84518f70ae.
2023-05-25 18:01:22 -04:00
Michael Silberling
993ebe67c8 Revert "Update README.md"
This reverts commit af2ef17832.
2023-05-25 18:01:12 -04:00
Michael Silberling
f967d73a95 Revert "Update README.md"
This reverts commit e8b0447a63.
2023-05-25 18:01:06 -04:00
Michael Silberling
e8b0447a63 Update README.md 2023-05-19 15:29:48 -04:00
Michael Silberling
af2ef17832 Update README.md 2023-05-19 15:29:39 -04:00
Michael Silberling
161aa6e472 add error check for l1_gas_used_paid 2023-05-19 15:26:17 -04:00
Michael Silberling
7c80c09500 Merge branch 'develop' of https://github.com/MSilb7/optimism-etl into develop 2023-05-19 15:19:09 -04:00
Michael Silberling
3affbadac3 comma 2023-05-19 15:19:01 -04:00
Michael Silberling
84518f70ae Update README.md 2023-05-19 15:14:37 -04:00
Michael Silberling
aae968cd4b Update README.md 2023-05-19 15:14:09 -04:00
Michael Silberling
6f44daf023 add l1 fields 2023-05-19 15:12:47 -04:00
TimNooren
2da9d050f4 Relax Click requirement (#444)
* Update post-shanghai test cases

Cases were based on Goerli while awaiting the mainnet upgrade.

* Relax Click requirement
2023-05-02 20:54:10 +08:00
TimNooren
2939c0afbf Update Github Actions runner image (#435) 2023-04-04 20:11:33 +08:00
TimNooren
2678a2a2e3 Add withdrawals (EIP-4895) (#434)
* Update IPFS gateway

* Add format parameter to test_export_blocks_job

* Add withdrawals field to block model

* Bump package version
2023-04-04 19:15:43 +08:00
Evgeny Medvedev
d801da96dd Merge pull request #432 from blockchain-etl/update_docs_nansen_link
Update link to Nansen Query in docs
2023-03-17 15:05:26 +07:00
medvedev1088
b876f2059e Update link to Nansen Query in docs 2023-03-17 16:02:18 +08:00
Evgeny Medvedev
204bcb65f6 Merge pull request #431 from blockchain-etl/update_readme_nansen_link
Update link to Nansen Query
2023-03-17 14:59:35 +07:00
medvedev1088
92c07982c4 Update link to Nansen Query 2023-03-17 15:55:53 +08:00
Maxim Razhev
b6dbf07dbf Bump package version (#420) 2022-12-09 18:11:14 +07:00
sleepy-tiger
f0732961f5 Upgrade eth-abi lower bound to >=2.2.0 (#419) 2022-12-09 18:03:32 +07:00
medvedev1088
8498a775da Update link to Telegram group 2022-10-14 18:33:28 +08:00
medvedev1088
f0e98871a2 Bump version 2022-10-14 18:21:00 +08:00
Evgeny Medvedev
f7f192510b Merge pull request #302 from blockjoe/develop
Fallback to `web3.eth.getLogs` when calling to nodes without `eth_newFilter`
2022-10-14 17:19:57 +07:00
Evgeny Medvedev
b1acfa3be7 Merge pull request #343 from sfsf9797/set_csv_limit
fix for #306 'field larger than field limit' error
2022-10-14 17:17:03 +07:00
Evgeny Medvedev
372bf2cb16 Merge pull request #365 from ezioruan/patch-1
Update schema.md
2022-10-14 17:16:20 +07:00
Evgeny Medvedev
45a089fe0c Merge pull request #383 from blockchain-etl/update_docs
Add --txlookuplimit 0 in commands.md
2022-10-14 17:15:08 +07:00
Evgeny Medvedev
688ecdfa3f Merge pull request #399 from m0ssc0de/develop
set max field size limit for export token transfers
2022-10-14 17:14:17 +07:00
medvedev1088
0f6234ade3 Bump version 2022-10-14 14:22:22 +08:00
Evgeny Medvedev
47308f4891 Merge pull request #359 from CoinStatsHQ/pr/aws-kinesis-support
Added support for AWS Kinesis
2022-10-14 13:19:43 +07:00
Moss
2c91a31061 set max field size limit for export token transfers 2022-10-02 22:56:57 +00:00
Evgeny Medvedev
956695b77b Merge pull request #396 from blockchain-intel/kafka-export-rm-print
Change print(data) in Kafka exporter to instead log at the debug level
2022-09-26 22:25:48 +07:00
Evgeny Medvedev
533f516296 Merge pull request #395 from blockchain-etl/feat/remove-quickfix-traces
Remove TempFix for insufficient funds since this has been resolved on the node
2022-09-23 11:23:03 +07:00
Max Cruz
d34b28e4bf rm print data in kafka exporter 2022-09-22 10:39:23 -04:00
Akshay
3ed8b8bc3e remove traces quickfix 2022-09-21 17:48:22 +08:00
medvedev1088
e1f658bc36 Bump version 2022-09-16 03:06:22 +08:00
Evgeny Medvedev
aae2edb20b Merge pull request #393 from blockchain-etl/bump_pubsub_version
Bump google-cloud-pubsub version
2022-09-16 02:05:29 +07:00
medvedev1088
12851c17a5 Bump google-cloud-pubsub version 2022-09-16 03:02:38 +08:00
medvedev1088
f5115547a3 Bump version 2022-09-16 00:27:05 +08:00
Evgeny Medvedev
58f5d9020c Merge pull request #392 from blockchain-etl/path_insufficient_funds_error_on_erigon
Temporary fix for the insufficient funds error when tracing a block
2022-09-15 23:26:21 +07:00
medvedev1088
f5fa89a916 Temporary fix for the insufficient funds error https://github.com/ledgerwatch/erigon/issues/5284 2022-09-16 00:05:49 +08:00
medvedev1088
262e5f65f1 Bump version 2022-08-16 22:27:40 +08:00
Evgeny Medvedev
6b64c2338b Merge pull request #371 from FeSens/bugfix/contract-logic-error
Bugfix: Ignore ContractLogicError raised by Web3.py
2022-08-16 21:26:24 +07:00
Evgeny Medvedev
be64a901ab Merge pull request #372 from yongchand/develop
Fix typo in export receipts and logs
2022-08-15 14:43:47 +07:00
medvedev1088
97e2749f2a Add --txlookuplimit 0 in commands.md 2022-08-13 16:50:06 +08:00
Evgeny Medvedev
ca9eb6696b Merge pull request #382 from blockchain-etl/update_docs
Add sudo apt-get install python-dev in .travis.yml
2022-08-12 19:55:22 +07:00
medvedev1088
6c3a0694a3 Lock grpcio version 2022-08-12 20:50:50 +08:00
medvedev1088
837c324448 Add sudo apt-get install python-dev in .travis.yml 2022-08-12 20:42:31 +08:00
medvedev1088
7ef53acee0 Remove unused file 2022-08-12 20:08:05 +08:00
Evgeny Medvedev
119a54fca1 Merge pull request #379 from blockchain-etl/update_docs
Remove --ipcapi debug from geth as it was deprecated
2022-08-11 12:12:43 +07:00
medvedev1088
cb0f955c27 Update Discord invite 2022-08-11 13:11:21 +08:00
medvedev1088
9725ff9122 Remove --ipcapi debug from geth as it was deprecated 2022-08-10 20:41:49 +08:00
yongchand
a142542ef9 Fix typo in export receipts and logs 2022-07-28 13:22:26 +09:00
FeSens
342c5df3bb ignore ContractLogicError 2022-07-25 19:15:22 -03:00
ezio ruan
d189e7a344 Update schema.md
add block_number to token schema
2022-07-08 17:48:01 +08:00
Evgeny Medvedev
f8f22f93a1 Merge pull request #358 from CoinStatsHQ/pr/aws-schemas-updated
AWS Athena schemas update to JSON
2022-06-19 01:56:01 +07:00
Anton Bryzgalov @ CoinStats
f4403a7e3f Added support for AWS Kinesis
Sponsored by CoinStats.app
2022-06-17 22:21:08 +04:00
Anton Bryzgalov @ CoinStats
4ee070627c schemas/aws: removed debug queries
Sponsored by CoinStats.app
2022-06-10 14:36:42 +04:00
Anton Bryzgalov @ CoinStats
7a337e724a schemas: aws_partition_by_date -> aws
Sponsored by CoinStats.app
2022-06-10 13:56:43 +04:00
Anton Bryzgalov @ CoinStats
ac812a0f36 schemas/aws_partition_by_date: schemas updated to JSON format
Sponsored by CoinStats.app
2022-06-10 13:56:02 +04:00
medvedev1088
1711d2e809 Bump version 2022-05-24 15:45:34 +08:00
Evgeny Medvedev
d251f21b04 Merge pull request #352 from ytrezq/patch-1
Transfer extractor : fix string compare bug by switching to case insensitive string comparison.
2022-05-24 15:43:47 +08:00
ytrezq
dcdc776c1b Fix string compare bug by switching to case insensitive string comparison.
Some nodes as a service don’t return the result in lowercase but use the ᴇɪᴘ-55 checksum format or in uppercase.
This results in some transfers being rejected whereas the topic match.
2022-05-23 21:06:25 +02:00
medvedev1088
59ddb23f45 Fix token addresses in commands.md 2022-05-15 22:37:01 +08:00
medvedev1088
64adeb77a8 Bump version 2022-05-09 00:49:56 +08:00
Evgeny Medvedev
caff3065f7 Merge pull request #346 from blockchain-etl/bump_pg8000_version
Bump pg8000 version
2022-05-09 00:48:41 +08:00
medvedev1088
d5567bf343 Bump pg8000 version to fix https://github.com/blockchain-etl/ethereum-etl/issues/345 2022-05-09 00:36:00 +08:00
medvedev1088
26e940224b Add notes about Apple M1 chip to README 2022-05-06 19:43:06 +08:00
medvedev1088
5efa6e0eb9 Bump version 2022-05-06 19:34:32 +08:00
Evgeny Medvedev
53c1b59c84 Merge pull request #339 from dbfreem/develop
web3 upgrade
2022-05-06 19:30:29 +08:00
sfsf9797
8c9d6a62cc set max field size limit 2022-05-05 00:08:33 +08:00
medvedev1088
d085d5a5a4 Bump version 2022-05-04 16:57:45 +08:00
Evgeny Medvedev
43227e54b2 Merge pull request #316 from ninjascant/fix/update-click
Update click version; update package version
2022-05-04 16:56:27 +08:00
Maxim Razhev
00e63d2b83 Update version at cli 2022-05-04 13:40:44 +05:00
Maxim Razhev
d58e72974a Resolve conflicts 2022-04-28 17:41:54 +05:00
Maxim Razhev
817660199c Resolve conflicts 2022-04-28 17:37:39 +05:00
DB
50925fc94d 3.7.2 in travis 2022-04-24 13:24:58 -04:00
DB
e63e703390 testing travix 2022-04-24 13:19:15 -04:00
DB
8a87ba85e3 trying to setup travix to run in 3.7.2 2022-04-24 11:54:14 -04:00
DB
15ff2a2ecb set min version to 3.7.2 in setup.py 2022-04-24 11:51:13 -04:00
DB
e511dac818 travis remove 3.6 2022-04-19 05:19:17 -04:00
DB
64d16f581b upgraded web3,py and eth-abi
removed python 3.6
2022-04-17 22:02:48 -04:00
Evgeny Medvedev
898ce3f3bf Merge pull request #326 from alexleventer/docs-improvements
Docs improvements
2022-04-11 15:01:26 +08:00
Evgeny Medvedev
da6cc6f653 Merge pull request #325 from alexleventer/links
GitHub Edit Links
2022-04-11 15:00:15 +08:00
Alex Leventer
53c74e9996 Convert text to a link 2022-04-10 12:35:38 -07:00
Alex Leventer
67e27a6536 add missing comma 2022-04-10 12:34:28 -07:00
Alex Leventer
3a28eb116d Various, small docs improvements 2022-04-10 12:33:53 -07:00
Alex Leventer
b80eac42a6 add trailing slash 2022-04-10 12:28:48 -07:00
Alex Leventer
72dcfd4979 Fix broken edit links 2022-04-10 12:27:52 -07:00
medvedev1088
4bfa3e6ba4 Bump version 2022-04-01 21:20:41 +07:00
Evgeny Medvedev
1883a01e3f Merge pull request #293 from bsh98/develop
Adds contract and token support for PostgreSQL when streaming
2022-04-01 21:17:16 +07:00
ninjascant
1883e5cdac Fix/test mocks (#323)
* Fix import in eth service test

* Fix mocks and expected values in export blocks test: set proper tx type values

* Fix mocks and expected values in export receipts tests: set proper effectiveGasPrice values

* Fix mocks and expected values in stream tests
2022-03-13 19:22:08 +08:00
bsh98
8a49edcae3 Merge branch 'develop' into develop 2022-03-05 11:14:03 -08:00
Maxim Razhev
ce2ce23ccd Fix import in test 2022-02-22 18:49:55 +05:00
Maxim Razhev
d1189ad721 Update click version; update package version 2022-02-22 17:47:19 +05:00
medvedev1088
c135afc4bc Bump version 2022-02-11 23:16:06 +08:00
Evgeny Medvedev
65feed595a Merge pull request #313 from blockchain-etl/lib_version_upgrade
Limit python-dateutil major version in case of breaking changes
2022-02-11 22:15:00 +07:00
medvedev1088
e82a86ca7f Limit python-dateutil major version in case of breaking changes 2022-02-11 23:03:12 +08:00
Evgeny Medvedev
ed31940391 Merge pull request #311 from emlazzarin/develop
bump python-dateutil
2022-02-11 21:58:47 +07:00
Eddy Lazzarin
a0689730e4 bumpb python-dateutil 2022-02-10 18:12:38 -08:00
Evgeny Medvedev
0beebb139d Merge pull request #303 from blockchain-etl/fix_tests2
Lock version of libcst to fix build and tests
2022-01-20 19:13:38 +07:00
medvedev1088
5dea830c16 Move kafka dependency to extras 2022-01-20 20:01:13 +08:00
medvedev1088
37d89e9c9d Fix broken build 2022-01-20 19:55:37 +08:00
medvedev1088
baa79e74c9 Add pip install --upgrade pip to travis 2022-01-17 14:50:38 +08:00
blockjoe
db590188d1 Added client-side log filtering for calling to ETH clients that don't support eth_newFilter 2022-01-14 14:09:11 -05:00
medvedev1088
87f5e45d17 Update docs 2022-01-12 17:26:24 +08:00
medvedev1088
b772ec7fd7 Update error message for tracing 2022-01-12 14:25:10 +08:00
medvedev1088
69bb6f9bb3 Update error message for tracing 2022-01-12 14:24:25 +08:00
medvedev1088
2a9e468c1e Bump version 2022-01-07 03:54:24 +08:00
Evgeny Medvedev
be1892dffa Merge pull request #299 from blockchain-etl/poa_support
Add POA support
2022-01-07 02:52:32 +07:00
medvedev1088
31fb4efc48 Add POA support 2022-01-07 03:33:43 +08:00
Evgeny Medvedev
167b38b6bc Merge pull request #271 from numonedad/bugfix/poachain
adds support for non-mainnet in etl stream
2022-01-07 02:15:06 +07:00
Evgeny Medvedev
7d47dd34d6 Merge pull request #295 from blockchain-etl/fix_timeout_travisci
Fix travis ci timeout
2021-12-27 13:32:58 +07:00
medvedev1088
c6fbd10ef3 Fix travis ci timeout 2021-12-27 14:20:02 +08:00
medvedev1088
114cd60b5a Fix travis ci timeout 2021-12-27 14:10:40 +08:00
bsh98
1a0bac2e2c blknum and addr composite pk for contracts, tokens 2021-12-24 11:13:42 -08:00
Evgeny Medvedev
2a17fb67ad Merge pull request #294 from blockchain-etl/add_python39_to_tests
Add python 3.9 to tests
2021-12-24 22:17:54 +07:00
medvedev1088
dba7adf8f1 Add python 3.9 to tests 2021-12-24 18:07:44 +08:00
medvedev1088
75847dd6ba Bump version 2021-12-24 18:00:46 +08:00
medvedev1088
e3b83639c2 Update docs 2021-12-24 17:59:37 +08:00
Evgeny Medvedev
6bb0fffd38 Merge pull request #291 from ayush3298/develop
Added exporter for kafka
2021-12-24 16:55:49 +07:00
bsh98
b62a2f1b30 adds support for python3.6 2021-12-23 20:33:26 -08:00
bsh98
9d9c383ab8 tokens, contracts support for postgresql 2021-12-23 19:14:25 -08:00
bsh98
79ad41aad9 postgres support 2021-12-23 16:19:03 -08:00
Evgeny Medvedev
38c2c1beec Merge pull request #292 from blockchain-etl/fix_tests
Fix tests
2021-12-23 22:37:55 +07:00
medvedev1088
a582f73cd2 Remove Python 3.5 support 2021-12-23 20:55:56 +08:00
deq
257da16c48 Fixed file name typo and used exporters 2021-12-23 18:10:57 +05:30
medvedev1088
1b9c07862c Remove Python 3.5 support 2021-12-23 20:29:27 +08:00
medvedev1088
0667b68cb6 Fix tests 2021-12-23 20:23:34 +08:00
deq
28acabe45e Made kafka generic for output, now it can be in format of kafka/127.0.0.1:9092 2021-12-23 13:13:43 +05:30
deq
f593053af3 Added param helper for kafka 2021-12-22 18:58:58 +05:30
deq
8df7d901ee Resolved Conflicts 2021-12-22 13:02:02 +05:30
medvedev1088
a2b678167b Bump version 2021-12-20 01:45:45 +08:00
Evgeny Medvedev
c4c9207474 Merge pull request #290 from blockchain-etl/feature/pubsub_message_ordering
GCS exporter plus Pub/Sub message ordering
2021-12-20 00:43:36 +07:00
medvedev1088
289b9005a0 Update docs 2021-12-20 01:40:25 +08:00
medvedev1088
eefffb0aa6 Parameterize pubsub item exporter for batch params 2021-12-20 01:23:28 +08:00
medvedev1088
967c1ad37a Allow path in GCS item exporter 2021-12-20 01:22:26 +08:00
medvedev1088
b0408582db Fix output validation in stream command 2021-12-20 01:22:11 +08:00
medvedev1088
8f93376232 Merge branch 'develop' into feature/pubsub_message_ordering
# Conflicts:
#	docs/dockerhub.md
2021-12-20 01:07:22 +08:00
deq
de4380fb89 Added exporter for kafka 2021-12-17 17:45:05 +05:30
medvedev1088
e0ca8f9a8c Merge remote-tracking branch 'origin/develop' into develop 2021-11-26 14:25:26 +08:00
medvedev1088
589cb06ef0 Add note about states to docs 2021-11-26 14:25:21 +08:00
medvedev1088
54d9220130 Bump version 2021-11-13 00:49:25 +08:00
Evgeny Medvedev
c2f24c6d18 Merge pull request #283 from kunalmodi/export_contracts_param
Export Contracts: Fix cli args
2021-11-13 00:48:08 +08:00
Kunal Modi
fedf6e60a4 Export Contracts: Fix cli args 2021-11-12 07:03:24 -08:00
medvedev1088
629aed5bc8 Update link to Travis CI 2021-09-27 02:53:51 +08:00
Evgeny Medvedev
25fc768f39 Merge pull request #269 from psych0xpomp/eip1559_columns
Add EIP-1559 columns
2021-08-15 20:54:30 +07:00
Drew Wells
42b96bcf7b adds support for non-mainnet in etl stream
relates #178
2021-08-12 19:33:05 -05:00
psych0xpomp
cf80415fcf Add EIP-1559 columns
Enable streaming of EIP-1559 related columns to blocks and transactions tables.
2021-08-09 16:22:06 +10:00
medvedev1088
104576d5eb Bump version 2021-08-08 15:08:41 +07:00
Evgeny Medvedev
135a475d46 Merge pull request #268 from blockchain-etl/change_log_level_in_export_tokens
Change log level to debug in eth_token_service.py
2021-08-08 15:07:19 +07:00
medvedev1088
90afaabce6 Suppress warning Symbolic Execution not available: No module named 'mythril' 2021-08-08 14:40:42 +07:00
medvedev1088
55a9371b2b Change log level to debug in eth_token_service.py 2021-08-08 14:17:00 +07:00
medvedev1088
1a8ac0630f Bump version 2021-08-04 23:01:08 +07:00
Evgeny Medvedev
3d79a22370 Merge pull request #266 from blockchain-etl/fix_utf8_decoding_of_token_data
Fix UnicodeDecodeError thrown when token returns undecodeable symbol
2021-08-04 23:00:07 +07:00
medvedev1088
d2b84bd643 Fix UnicodeDecodeError thrown when token returns undecodeable symbol or name 2021-08-04 22:48:30 +07:00
medvedev1088
1a212405ed Bump version 2021-08-02 22:39:14 +07:00
medvedev1088
a808330950 Fix receipt_effective_gas_price in streaming 2021-08-02 22:37:58 +07:00
medvedev1088
9ff51f993c Bump version 2021-08-02 18:48:56 +07:00
Evgeny Medvedev
f2f88e64c5 Merge pull request #263 from blockchain-etl/eip1559-fields
EIP-1559 fields
2021-08-02 18:10:41 +07:00
medvedev1088
7ee3497431 Fix tests 2021-08-02 16:34:14 +07:00
medvedev1088
170e7979fe Add option to convert values to strings in JSON output for extact_token_transfers and extract_tokens command. Needed for ethereum-etl-airflow 2021-08-02 16:14:37 +07:00
medvedev1088
5dd95554ef Merge branch 'develop' into eip1559-fields
# Conflicts:
#	setup.py
2021-08-01 22:19:13 +07:00
medvedev1088
45c3baffe6 Refactor 2021-07-30 00:54:58 +07:00
medvedev1088
86bb20e9d1 Fix tests 2021-07-29 23:46:53 +07:00
medvedev1088
8aa076bfb7 Bump version 2021-07-29 23:17:45 +07:00
medvedev1088
d9378e7d17 Add bytes32 support for symbol and name in ERC20 tokens 2021-07-29 23:17:01 +07:00
medvedev1088
55332cde00 Bump python-dateutil version 2021-07-29 22:35:53 +07:00
medvedev1088
eaf6a8f9b6 Bump version 2021-07-25 16:04:59 +07:00
Evgeny Medvedev
040849c66b Merge pull request #261 from blockchain-etl/fix_dependencies
Fix dependencies
2021-07-25 16:03:47 +07:00
medvedev1088
c2a878e175 Update link to travis ci 2021-07-25 14:36:13 +07:00
medvedev1088
083cbd6891 Trigger build 2021-07-25 14:32:36 +07:00
medvedev1088
c7ffffa5a8 Fix eth-utils version, bump click to 7.1.2 2021-07-25 14:20:54 +07:00
medvedev1088
240982bac1 Fix slow tests 2021-07-25 14:20:28 +07:00
Evgeny Medvedev
53fa461001 Merge pull request #256 from ninjascant/feature/eip1559-fields
EIP1559 fields
2021-07-19 19:37:59 +07:00
Maxim Razhev
efeeb297df Add missing transaction_type field to streaming tx enrichment 2021-07-05 01:12:31 +05:00
Maxim Razhev
1e00335b71 Add new fields to streamer tx enrichment 2021-07-01 17:00:17 +05:00
Maxim Razhev
e70698e8b5 Fix tx fee per gas fields type 2021-07-01 12:59:30 +05:00
Maxim Razhev
5f41b1ef15 Add effective_gas_price field to receipts 2021-07-01 12:58:55 +05:00
Maxim Razhev
926c0afad1 Fix schema 2021-07-01 11:28:34 +05:00
Maxim Razhev
47049e0697 Fix field names 2021-07-01 10:56:51 +05:00
Maxim Razhev
1bacd89423 Fix field name case 2021-06-30 16:17:31 +05:00
Maxim Razhev
686107b313 Fix new field export/ 2021-06-28 17:03:23 +05:00
Maxim Razhev
4dba6a1e8c Add baseFeePerGas for block export 2021-06-24 17:44:58 +05:00
medvedev1088
ecc4484034 Enable message ordering if topic name contains sorted 2021-06-22 18:58:26 +07:00
medvedev1088
b568101c9c Bump version 2021-06-08 16:53:14 +07:00
Evgeny Medvedev
d25bd078f3 Merge pull request #251 from blockchain-etl/fix_typo_postgres_exporter
Fix typo in item_exporter_creator.py
2021-06-08 16:52:08 +07:00
medvedev1088
cb5dcac8c0 Fix typo in item_exporter_creator.py 2021-06-08 16:49:52 +07:00
medvedev1088
e79c32e422 Merge remote-tracking branch 'origin/develop' into develop 2021-06-04 19:24:44 +07:00
medvedev1088
479d8ece72 Add link to Public datasets in BigQuery 2021-06-04 19:24:36 +07:00
Evgeny Medvedev
b4a385e915 Merge pull request #249 from a6b8/develop
Update README.md
2021-05-29 02:33:56 +07:00
Andreas Banholzer
0e11db80f0 Update README.md 2021-05-28 19:53:10 +02:00
medvedev1088
dbb7248206 Simplify README 2021-05-15 21:40:42 +07:00
medvedev1088
de2a9ed5aa Update link in readme 2021-04-28 22:02:48 +07:00
medvedev1088
b3fab3c089 Bump version 2021-03-05 22:07:51 +07:00
Evgeny Medvedev
895bf818a2 Merge pull request #238 from blockchain-etl/erc20_functions_coverage
Add NAME, SYMBOL, DECIMALS to erc20_abi.py and eth_token_service.py
2021-03-05 22:05:59 +07:00
medvedev1088
d83fcd4307 Add NAME, SYMBOL, DECIMALS to erc20_abi.py and eth_token_service.py 2021-03-05 21:50:30 +07:00
Evgeny Medvedev
d7283ba301 Merge pull request #233 from blockchain-etl/bug/converters_module_broken
Fix broken converters module
2021-01-09 19:55:39 +07:00
medvedev1088
111633874a Bump version 2021-01-09 19:39:04 +07:00
medvedev1088
3c6291a873 Fix missing converters module 2021-01-09 19:38:16 +07:00
medvedev1088
48f11fc9e1 Add export to GCS and message ordering in pubsub 2021-01-09 19:34:53 +07:00
medvedev1088
511b60ecfa Enable message ordering for pubsub exporter 2020-12-06 19:42:29 +07:00
medvedev1088
fcf576f6bc Add link to Ethereum 2.0 ETL to README 2020-10-26 19:13:03 +07:00
medvedev1088
15b0f683b9 Add Programming Language Python 3.8 to setup.py 2020-10-09 17:17:42 +07:00
medvedev1088
742e78b7f7 Bump version 2020-10-09 17:14:57 +07:00
Evgeny Medvedev
68f6bec10b Merge pull request #225 from blockchain-etl/feature/python38
Add py38 in setup.py and tests
2020-10-09 17:13:31 +07:00
medvedev1088
04b179aadf Add py38 setup.py and tests 2020-10-09 17:02:00 +07:00
medvedev1088
8d159a58c0 Update docs 2020-10-03 18:27:13 +07:00
medvedev1088
10087aecbb Remove latest tag from dockerhub workflow 2020-08-21 20:42:25 +07:00
medvedev1088
e340074ce6 Bump the version 2020-08-21 20:05:04 +07:00
Evgeny Medvedev
a74f53f351 Merge pull request #222 from blockchain-etl/bug/tokens_param_recognizes_single_value
Fix --tokens in export_token_transfers.py recognizes only 1 parameter
2020-08-21 20:03:08 +07:00
medvedev1088
e61248e798 Fix --tokens in export_token_transfers.py recognizes only 1 parameter 2020-08-21 19:43:19 +07:00
medvedev1088
e78a856438 Update Infura id 2020-08-14 19:35:39 +07:00
medvedev1088
40b98215b6 Update citing.md 2020-07-22 15:01:57 +07:00
medvedev1088
c19bdf053f Fix extra comma in citing.md 2020-07-22 00:10:13 +07:00
medvedev1088
8ccb6dfe77 Merge remote-tracking branch 'origin/develop' into develop 2020-07-22 00:08:40 +07:00
medvedev1088
4ce02de2e0 Add Citing section to the docs 2020-07-22 00:08:28 +07:00
Evgeny Medvedev
56d232781a Merge pull request #214 from franckc/originprotocol
Add support for extracting Origin Protocol data
2020-06-14 19:54:21 +07:00
Franck Chastagnol
c5a67b0fd4 Use null rather than empty string as default for shop product fields 2020-06-08 09:39:20 -07:00
Franck Chastagnol
2498bf5560 Fix unit tests 2020-06-07 23:21:07 -07:00
Franck Chastagnol
4c0a06fc36 Minor fixes 2020-06-07 22:45:31 -07:00
Franck Chastagnol
101f0dbd67 Add dependency on requests package 2020-06-07 22:09:32 -07:00
Franck Chastagnol
bc40a13ec6 Merge branch 'develop' into originprotocol 2020-06-07 22:01:42 -07:00
Franck Chastagnol
1bca49b31f Clean up, Add unit tests 2020-06-07 21:55:07 -07:00
Evgeny Medvedev
8df8407137 Merge pull request #217 from blockchain-etl/fix/update-nansen-url
Updated Nansen link in docs
2020-05-25 23:40:51 +07:00
askeluv
4958c1e264 Updated Nansen link in docs 2020-05-25 18:00:01 +02:00
medvedev1088
60f5340754 Add a link to Ivan on Tech video 2020-05-23 16:59:00 +07:00
Franck Chastagnol
c84a6d1195 Extract origin protocol data 2020-05-15 19:11:52 -07:00
Evgeny Medvedev
04bc4a888b Merge pull request #212 from blockchain-etl/fix/update-project-links
Updated Nansen link + added projects to README.md
2020-05-05 21:01:24 +07:00
askeluv
84886c7f48 Updated Nansen link + added projects to README.md 2020-05-05 15:44:44 +02:00
Evgeny Medvedev
c1e5691d1d Merge pull request #210 from blockchain-etl/feature/publish_to_dockerhub_workflow
Add publish-to-dockerhub.yml
2020-04-21 21:55:43 +07:00
medvedev1088
16dfcb24ed Fix organization name in publish-to-dockerhub.yml 2020-04-16 23:58:41 +07:00
medvedev1088
8164ee105d Add publish-to-dockerhub.yml 2020-04-16 23:49:12 +07:00
medvedev1088
ac866f6459 Update README 2020-04-16 23:25:30 +07:00
medvedev1088
90c4982a6b Add Infura project id to commands in docs 2020-04-16 23:23:17 +07:00
medvedev1088
ae131baa0e Update docs 2020-04-16 23:09:32 +07:00
164 changed files with 7871 additions and 556 deletions

4
.github/FUNDING.yml vendored
View File

@@ -1,4 +0,0 @@
# These are supported funding model platforms
custom: https://gitcoin.co/grants/233/ethereumetl

View File

@@ -0,0 +1,20 @@
name: Publish DockerHub
on:
push:
tags:
- '*'
jobs:
build:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@master
- name: Publish to DockerHub
if: startsWith(github.event.ref, 'refs/tags/v')
uses: elgohr/Publish-Docker-Github-Action@master
with:
name: blockchainetl/ethereum-etl
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
tag_semver: true

View File

@@ -8,7 +8,7 @@ on:
jobs:
build-n-publish:
name: Build and publish to PyPI and TestPyPI
runs-on: ubuntu-18.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@master
- name: Set up Python 3.7

3
.gitignore vendored
View File

@@ -47,3 +47,6 @@ coverage.xml
.venv
venv/
ENV/
# etl
/last_synced_block.txt

14
.readthedocs.yaml Normal file
View File

@@ -0,0 +1,14 @@
# Read the Docs configuration file for MkDocs projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.12"
mkdocs:
configuration: mkdocs.yml

View File

@@ -2,13 +2,13 @@ language: python
dist: xenial
matrix:
include:
- python: "3.5"
env: TOX_POSARGS="-e py35"
- python: "3.6"
env: TOX_POSARGS="-e py36"
- python: "3.7"
- python: "3.7.2"
env: TOX_POSARGS="-e py37"
- python: "3.8"
env: TOX_POSARGS="-e py38"
- python: "3.9"
env: TOX_POSARGS="-e py39"
install:
- travis_retry pip install tox
script:
- tox $TOX_POSARGS
- travis_wait tox $TOX_POSARGS

View File

@@ -1,4 +1,4 @@
FROM python:3.6
FROM python:3.7
MAINTAINER Evgeny Medvedev <evge.medvedev@gmail.com>
ENV PROJECT_DIR=ethereum-etl

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com, https://twitter.com/EvgeMedvedev
Copyright (c) 2018-2025 Evgeny Medvedev, evge.medvedev@gmail.com, https://twitter.com/EvgeMedvedev
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
SOFTWARE.

View File

@@ -1,9 +1,9 @@
# Ethereum ETL
[![Build Status](https://travis-ci.org/blockchain-etl/ethereum-etl.png)](https://travis-ci.org/blockchain-etl/ethereum-etl)
[![Join the chat at https://gitter.im/ethereum-eth](https://badges.gitter.im/ethereum-etl.svg)](https://gitter.im/ethereum-etl/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Telegram](https://img.shields.io/badge/telegram-join%20chat-blue.svg)](https://t.me/joinchat/GsMpbA3mv1OJ6YMp3T5ORQ)
[![Discord](https://img.shields.io/badge/discord-join%20chat-blue.svg)](https://discord.gg/wukrezR)
[![Build Status](https://app.travis-ci.com/blockchain-etl/ethereum-etl.svg?branch=develop)](https://travis-ci.com/github/blockchain-etl/ethereum-etl)
[![License](https://img.shields.io/github/license/blockchain-etl/ethereum-etl)](https://github.com/blockchain-etl/ethereum-etl/blob/develop/LICENSE)
[![Telegram](https://img.shields.io/badge/telegram-join%20chat-blue.svg)](https://t.me/BlockchainETL)
[![Twitter](https://img.shields.io/twitter/follow/EthereumETL)](https://x.com/EthereumETL)
Ethereum ETL lets you convert blockchain data into convenient formats like CSVs and relational databases.
@@ -23,10 +23,11 @@ Export blocks and transactions ([Schema](docs/schema.md#blockscsv), [Reference](
```bash
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
--provider-uri https://mainnet.infura.io --blocks-output blocks.csv --transactions-output transactions.csv
--blocks-output blocks.csv --transactions-output transactions.csv \
--provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c
```
Export ERC20 and ERC721 transfers ([Schema](docs/schema.md#token_transferscsv), [Reference](docs/commands.md##export_token_transfers)):
Export ERC20 and ERC721 transfers ([Schema](docs/schema.md#token_transferscsv), [Reference](docs/commands.md#export_token_transfers)):
```bash
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
@@ -46,7 +47,8 @@ Stream blocks, transactions, logs, token_transfers continually to console ([Refe
```bash
> pip3 install ethereum-etl[streaming]
> ethereumetl stream --start-block 500000 -e block,transaction,log,token_transfer --log-file log.txt
> ethereumetl stream --start-block 500000 -e block,transaction,log,token_transfer --log-file log.txt \
--provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c
```
Find other commands [here](https://ethereum-etl.readthedocs.io/en/latest/commands/).
@@ -62,20 +64,23 @@ For the latest version, check out the repo and call
- [Schema](https://ethereum-etl.readthedocs.io/en/latest/schema/)
- [Command Reference](https://ethereum-etl.readthedocs.io/en/latest/commands/)
- [Documentation](https://ethereum-etl.readthedocs.io/)
- [Public Datasets in BigQuery](https://github.com/blockchain-etl/public-datasets)
- [Exporting the Blockchain](https://ethereum-etl.readthedocs.io/en/latest/exporting-the-blockchain/)
- [Querying in Amazon Athena](https://ethereum-etl.readthedocs.io/en/latest/amazon-athena/)
- [Querying in Google BigQuery](https://ethereum-etl.readthedocs.io/en/latest/google-bigquery/)
- [Querying in Kaggle](https://www.kaggle.com/bigquery/ethereum-blockchain)
- [Airflow DAGs](https://github.com/blockchain-etl/ethereum-etl-airflow)
- [Postgres ETL](https://github.com/blockchain-etl/ethereum-etl-postgresql)
- [Ethereum 2.0 ETL](https://github.com/blockchain-etl/ethereum2-etl)
## Running Tests
```bash
> pip3 install -e .[dev,streaming]
> export ETHEREUM_ETL_RUN_SLOW_TESTS=True
> export PROVIDER_URL=<your_provider_uri>
> pytest -vv
```
```
### Running Tox Tests
@@ -86,7 +91,7 @@ For the latest version, check out the repo and call
## Running in Docker
1. Install Docker https://docs.docker.com/install/
1. Install Docker: https://docs.docker.com/get-docker/
2. Build a docker image
@@ -100,8 +105,19 @@ For the latest version, check out the repo and call
4. Run streaming to console or Pub/Sub
> docker build -t ethereum-etl:latest -f Dockerfile .
> docker build -t ethereum-etl:latest .
> echo "Stream to console"
> docker run ethereum-etl:latest stream --start-block 500000 --log-file log.txt
> echo "Stream to Pub/Sub"
> docker run -v /path_to_credentials_file/:/ethereum-etl/ --env GOOGLE_APPLICATION_CREDENTIALS=/ethereum-etl/credentials_file.json ethereum-etl:latest stream --start-block 500000 --output projects/<your-project>/topics/crypto_ethereum
> docker run -v /path_to_credentials_file/:/ethereum-etl/ --env GOOGLE_APPLICATION_CREDENTIALS=/ethereum-etl/credentials_file.json ethereum-etl:latest stream --start-block 500000 --output projects/<your_project>/topics/crypto_ethereum
If running on an Apple M1 chip add the `--platform linux/x86_64` option to the `build` and `run` commands e.g.:
```
docker build --platform linux/x86_64 -t ethereum-etl:latest .
docker run --platform linux/x86_64 ethereum-etl:latest stream --start-block 500000
```
## Projects using Ethereum ETL
* [Google](https://goo.gl/oY5BCQ) - Public BigQuery Ethereum datasets
* [Nansen](https://nansen.ai/query?ref=ethereumetl) - Analytics platform for Ethereum

View File

@@ -45,7 +45,7 @@ class BaseItemExporter(object):
self._configure(kwargs)
def _configure(self, options, dont_fail=False):
"""Configure the exporter by poping options from the ``options`` dict.
"""Configure the exporter by popping options from the ``options`` dict.
If dont_fail is set, it won't raise an exception on unexpected options
(useful for using with keyword arguments in subclasses constructors)
"""
@@ -119,9 +119,16 @@ class CsvItemExporter(BaseItemExporter):
return serializer(value)
def _join_if_needed(self, value):
def to_string(x):
if isinstance(x, dict):
# Separators without whitespace for compact format.
return JSONEncoder(separators=(',', ':')).encode(x)
else:
return str(x)
if isinstance(value, (list, tuple)):
try:
return self._join_multivalued.join(str(x) for x in value)
return self._join_multivalued.join(to_string(x) for x in value)
except TypeError: # list in value may not contain strings
pass
return value

View File

@@ -24,10 +24,11 @@ import logging
from blockchainetl.atomic_counter import AtomicCounter
from blockchainetl.exporters import CsvItemExporter, JsonLinesItemExporter
from blockchainetl.file_utils import get_file_handle, close_silently
from blockchainetl.jobs.exporters.converters.composite_item_converter import CompositeItemConverter
class CompositeItemExporter:
def __init__(self, filename_mapping, field_mapping=None):
def __init__(self, filename_mapping, field_mapping=None, converters=()):
self.filename_mapping = filename_mapping
self.field_mapping = field_mapping or {}
@@ -35,6 +36,8 @@ class CompositeItemExporter:
self.exporter_mapping = {}
self.counter_mapping = {}
self.converter = CompositeItemConverter(converters)
self.logger = logging.getLogger('CompositeItemExporter')
def open(self):
@@ -62,7 +65,7 @@ class CompositeItemExporter:
exporter = self.exporter_mapping.get(item_type)
if exporter is None:
raise ValueError('Exporter for item type {} not found'.format(item_type))
exporter.export_item(item)
exporter.export_item(self.converter.convert_item(item))
counter = self.counter_mapping.get(item_type)
if counter is not None:

View File

@@ -37,6 +37,9 @@ class CompositeItemConverter:
self.converters = converters
def convert_item(self, item):
if self.converters is None:
return item
for converter in self.converters:
item = converter.convert_item(item)
return item

View File

@@ -0,0 +1,46 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# MIT License
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
from blockchainetl.jobs.exporters.converters.simple_item_converter import SimpleItemConverter
class IntToStringItemConverter(SimpleItemConverter):
def __init__(self, keys=None):
self.keys = set(keys) if keys else None
def convert_field(self, key, value):
if isinstance(value, int) and (self.keys is None or key in self.keys):
return str(value)
else:
return value

View File

@@ -30,11 +30,10 @@
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
class SimpleItemConverter:
def __init__(self, converters=()):
self.converters = converters
def __init__(self, field_converters=None):
self.field_converters = field_converters
def convert_item(self, item):
return {
@@ -42,4 +41,7 @@ class SimpleItemConverter:
}
def convert_field(self, key, value):
return value
if self.field_converters is not None and key in self.field_converters:
return self.field_converters[key](value)
else:
return value

View File

@@ -0,0 +1,111 @@
# MIT License
#
# Copyright (c) 2020 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import json
import logging
from collections import defaultdict
from google.cloud import storage
def build_block_bundles(items):
blocks = defaultdict(list)
transactions = defaultdict(list)
logs = defaultdict(list)
token_transfers = defaultdict(list)
traces = defaultdict(list)
for item in items:
item_type = item.get('type')
if item_type == 'block':
blocks[item.get('number')].append(item)
elif item_type == 'transaction':
transactions[item.get('block_number')].append(item)
elif item_type == 'log':
logs[item.get('block_number')].append(item)
elif item_type == 'token_transfer':
token_transfers[item.get('block_number')].append(item)
elif item_type == 'trace':
traces[item.get('block_number')].append(item)
else:
logging.info(f'Skipping item with type {item_type}')
block_bundles = []
for block_number in sorted(blocks.keys()):
if len(blocks[block_number]) != 1:
raise ValueError(f'There must be a single block for a given block number, was {len(blocks[block_number])} for block number {block_number}')
block_bundles.append({
'block': blocks[block_number][0],
'transactions': transactions[block_number],
'logs': logs[block_number],
'token_transfers': token_transfers[block_number],
'traces': traces[block_number],
})
return block_bundles
class GcsItemExporter:
def __init__(
self,
bucket,
path='blocks',
build_block_bundles_func=build_block_bundles):
self.bucket = bucket
self.path = normalize_path(path)
self.build_block_bundles_func = build_block_bundles_func
self.storage_client = storage.Client()
def open(self):
pass
def export_items(self, items):
block_bundles = self.build_block_bundles_func(items)
for block_bundle in block_bundles:
block = block_bundle.get('block')
if block is None:
raise ValueError('block_bundle must include the block field')
block_number = block.get('number')
if block_number is None:
raise ValueError('block_bundle must include the block.number field')
destination_blob_name = f'{self.path}/{block_number}.json'
bucket = self.storage_client.bucket(self.bucket)
blob = bucket.blob(destination_blob_name)
blob.upload_from_string(json.dumps(block_bundle))
logging.info(f'Uploaded file gs://{self.bucket}/{destination_blob_name}')
def close(self):
pass
def normalize_path(p):
if p is None:
p = ''
if p.startswith('/'):
p = p[1:]
if p.endswith('/'):
p = p[:len(p) - 1]
return p

View File

@@ -29,9 +29,19 @@ from timeout_decorator import timeout_decorator
class GooglePubSubItemExporter:
def __init__(self, item_type_to_topic_mapping, message_attributes=('item_id', 'item_timestamp')):
def __init__(self, item_type_to_topic_mapping, message_attributes=(),
batch_max_bytes=1024 * 5, batch_max_latency=1, batch_max_messages=1000,
enable_message_ordering=False):
self.item_type_to_topic_mapping = item_type_to_topic_mapping
self.publisher = create_publisher()
self.batch_max_bytes = batch_max_bytes
self.batch_max_latency = batch_max_latency
self.batch_max_messages = batch_max_messages
self.enable_message_ordering = enable_message_ordering
self.publisher = self.create_publisher()
self.message_attributes = message_attributes
def open(self):
@@ -46,7 +56,7 @@ class GooglePubSubItemExporter:
# details = "channel is in state TRANSIENT_FAILURE"
# https://stackoverflow.com/questions/55552606/how-can-one-catch-exceptions-in-python-pubsub-subscriber-that-are-happening-in-i?noredirect=1#comment97849067_55552606
logging.info('Recreating Pub/Sub publisher.')
self.publisher = create_publisher()
self.publisher = self.create_publisher()
raise e
@timeout_decorator.timeout(300)
@@ -66,7 +76,8 @@ class GooglePubSubItemExporter:
topic_path = self.item_type_to_topic_mapping.get(item_type)
data = json.dumps(item).encode('utf-8')
message_future = self.publisher.publish(topic_path, data=data, **self.get_message_attributes(item))
ordering_key = 'all' if self.enable_message_ordering else ''
message_future = self.publisher.publish(topic_path, data=data, ordering_key=ordering_key, **self.get_message_attributes(item))
return message_future
else:
logging.warning('Topic for item type "{}" is not configured.'.format(item_type))
@@ -80,15 +91,15 @@ class GooglePubSubItemExporter:
return attributes
def create_publisher(self):
batch_settings = pubsub_v1.types.BatchSettings(
max_bytes=self.batch_max_bytes,
max_latency=self.batch_max_latency,
max_messages=self.batch_max_messages,
)
publisher_options = pubsub_v1.types.PublisherOptions(enable_message_ordering=self.enable_message_ordering)
return pubsub_v1.PublisherClient(batch_settings=batch_settings, publisher_options=publisher_options)
def close(self):
pass
def create_publisher():
batch_settings = pubsub_v1.types.BatchSettings(
max_bytes=1024 * 5, # 5 kilobytes
max_latency=1, # 1 second
max_messages=1000,
)
return pubsub_v1.PublisherClient(batch_settings)

View File

@@ -0,0 +1,54 @@
import collections
import json
import logging
from kafka import KafkaProducer
from blockchainetl.jobs.exporters.converters.composite_item_converter import CompositeItemConverter
class KafkaItemExporter:
def __init__(self, output, item_type_to_topic_mapping, converters=()):
self.item_type_to_topic_mapping = item_type_to_topic_mapping
self.converter = CompositeItemConverter(converters)
self.connection_url = self.get_connection_url(output)
print(self.connection_url)
self.producer = KafkaProducer(bootstrap_servers=self.connection_url)
def get_connection_url(self, output):
try:
return output.split('/')[1]
except KeyError:
raise Exception('Invalid kafka output param, It should be in format of "kafka/127.0.0.1:9092"')
def open(self):
pass
def export_items(self, items):
for item in items:
self.export_item(item)
def export_item(self, item):
item_type = item.get('type')
if item_type is not None and item_type in self.item_type_to_topic_mapping:
data = json.dumps(item).encode('utf-8')
logging.debug(data)
return self.producer.send(self.item_type_to_topic_mapping[item_type], value=data)
else:
logging.warning('Topic for item type "{}" is not configured.'.format(item_type))
def convert_items(self, items):
for item in items:
yield self.converter.convert_item(item)
def close(self):
pass
def group_by_item_type(items):
result = collections.defaultdict(list)
for item in items:
result[item.get('type')].append(item)
return result

View File

@@ -0,0 +1,82 @@
# MIT License
#
# Copyright (c) 2022 CoinStats LLC
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import json
import typing as t
import uuid
from itertools import zip_longest
import boto3
_KINESIS_BATCH_LIMIT = 500
def _uuid_partition_key(_: dict) -> str:
return uuid.uuid4().hex
class KinesisItemExporter:
def __init__(
self,
stream_name: str,
partition_key_callable: t.Callable[[dict], str] = _uuid_partition_key,
):
import boto3
self._stream_name = stream_name
self._partition_key_callable = partition_key_callable
self._kinesis_client = None # initialized in .open
def open(self) -> None:
self._kinesis_client = boto3.client('kinesis')
def export_items(self, items: t.Iterable[dict]) -> None:
sentinel = object()
chunks = zip_longest(
*(iter(items),) * _KINESIS_BATCH_LIMIT,
fillvalue=sentinel,
)
for chunk in chunks:
self._kinesis_client.put_records(
StreamName=self._stream_name,
Records=[
{
'Data': _serialize_item(item),
'PartitionKey': self._partition_key_callable(item),
}
for item in chunk
if item is not sentinel
],
)
def export_item(self, item: dict) -> None:
self._kinesis_client.put_record(
StreamName=self._stream_name,
Data=_serialize_item(item),
PartitionKey=self._partition_key_callable(item),
)
def close(self):
pass
def _serialize_item(item: dict) -> bytes:
return json.dumps(item).encode()

View File

@@ -0,0 +1,42 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
class MultiItemExporter:
def __init__(self, item_exporters):
self.item_exporters = item_exporters
def open(self):
for exporter in self.item_exporters:
exporter.open()
def export_items(self, items):
for exporter in self.item_exporters:
exporter.export_items(items)
def export_item(self, item):
for exporter in self.item_exporters:
exporter.export_item(item)
def close(self):
for exporter in self.item_exporters:
exporter.close()

View File

@@ -7,3 +7,5 @@ def logging_basic_config(filename=None):
logging.basicConfig(level=logging.INFO, format=format, filename=filename)
else:
logging.basicConfig(level=logging.INFO, format=format)
logging.getLogger('ethereum_dasm.evmdasm').setLevel(logging.ERROR)

10
docs/citing.md Normal file
View File

@@ -0,0 +1,10 @@
## How to Cite
```
@misc{ethereumetl,
author = {Evgeny Medvedev and the D5 team},
title = {Ethereum ETL},
year = {2018},
url = {https://github.com/blockchain-etl/ethereum-etl}
}
```

View File

@@ -56,7 +56,7 @@ Include `--tokens <token1> --tokens <token2>` to filter only certain tokens, e.g
```bash
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output token_transfers.csv \
--tokens 0x86fa049857e0209aa7d9e616f7eb3b3b78ecfdb0 --tokens 0x06012c8cf97bead5deae237070f9587f8e7a266d
--tokens 0x1F573D6Fb3F13d689FF844B4cE37794d79a7FF1C --tokens 0x80fB784B7eD66730e8b1DBd9820aFD29931aab03
```
You can tune `--batch-size`, `--max-workers` for performance.
@@ -165,7 +165,7 @@ You can tune `--batch-size`, `--max-workers` for performance.
Read [Differences between geth and parity traces.csv](schema.md#differences-between-geth-and-parity-tracescsv)
The API used in this command is not supported by Infura,
so you will need a local Geth archive node (`geth --gcmode archive --syncmode full --ipcapi debug`).
so you will need a local Geth archive node (`geth --gcmode archive --syncmode full --txlookuplimit 0`).
When using rpc, add `--rpc --rpcapi debug` options.
```bash
@@ -186,7 +186,7 @@ You can tune `--batch-size`, `--max-workers` for performance.
#### get_block_range_for_date
```bash
> ethereumetl get_block_range_for_date --provider-uri=https://mainnet.infura.io --date 2018-01-01
> ethereumetl get_block_range_for_date --provider-uri=https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c --date 2018-01-01
4832686,4838611
```
@@ -201,17 +201,21 @@ You can tune `--batch-size`, `--max-workers` for performance.
```bash
> pip3 install ethereum-etl[streaming]
> ethereumetl stream --provider-uri https://mainnet.infura.io --start-block 500000
> ethereumetl stream --provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c --start-block 500000
```
- This command outputs blocks, transactions, logs, token_transfers to the console by default.
- Entity types can be specified with the `-e` option,
e.g. `-e block,transaction,log,token_transfer,trace,contract,token`.
- Use `--output` option to specify the Google Pub/Sub topic or Postgres database where to publish blockchain data,
- Use `--output` option to specify the Google Pub/Sub topic, Postgres database or GCS bucket where to publish blockchain data,
- For Google PubSub: `--output=projects/<your-project>/topics/crypto_ethereum`.
Data will be pushed to `projects/<your-project>/topics/crypto_ethereum.blocks`, `projects/<your-project>/topics/crypto_ethereum.transactions` etc. topics.
- For Postgres: `--output=postgresql+pg8000://<user>:<password>@<host>:<port>/<database_name>`,
e.g. `--output=postgresql+pg8000://postgres:admin@127.0.0.1:5432/ethereum`.
e.g. `--output=postgresql+pg8000://postgres:admin@127.0.0.1:5432/ethereum`.
- For GCS: `--output=gs://<bucket_name>`. Make sure to install and initialize `gcloud` cli.
- For Kafka: `--output=kafka/<host>:<port>`, e.g. `--output=kafka/127.0.0.1:9092`
- Those output types can be combined with a comma e.g. `--output=gs://<bucket_name>,projects/<your-project>/topics/crypto_ethereum`
The [schema](https://github.com/blockchain-etl/ethereum-etl-postgres/tree/master/schema)
and [indexes](https://github.com/blockchain-etl/ethereum-etl-postgres/tree/master/indexes) can be found in this
repo [ethereum-etl-postgres](https://github.com/blockchain-etl/ethereum-etl-postgres).

View File

@@ -1,4 +1,3 @@
# Contact
- [D5 Discord Server](https://discord.gg/wukrezR)
- [Telegram Group](https://t.me/joinchat/GsMpbA3mv1OJ6YMp3T5ORQ)

View File

@@ -1,11 +1,11 @@
# Uploading to Docker Hub
```bash
ETHEREUMETL_STREAMING_VERSION=1.5.1
docker build -t ethereum-etl:${ETHEREUMETL_STREAMING_VERSION} -f Dockerfile .
docker tag ethereum-etl:${ETHEREUMETL_STREAMING_VERSION} blockchainetl/ethereum-etl:${ETHEREUMETL_STREAMING_VERSION}
docker push blockchainetl/ethereum-etl:${ETHEREUMETL_STREAMING_VERSION}
ETHEREUMETL_VERSION=1.11.0
docker build -t ethereum-etl:${ETHEREUMETL_VERSION} -f Dockerfile .
docker tag ethereum-etl:${ETHEREUMETL_VERSION} blockchainetl/ethereum-etl:${ETHEREUMETL_VERSION}
docker push blockchainetl/ethereum-etl:${ETHEREUMETL_VERSION}
docker tag ethereum-etl:${ETHEREUMETL_STREAMING_VERSION} blockchainetl/ethereum-etl:latest
docker tag ethereum-etl:${ETHEREUMETL_VERSION} blockchainetl/ethereum-etl:latest
docker push blockchainetl/ethereum-etl:latest
```

View File

@@ -1,21 +1,21 @@
## Exporting the Blockchain
If you'd like to have blockchain data set up and hosted for you, [get in touch with us at D5](https://d5.ai/?ref=ethereumetl).
1. Install python 3.5.3+ https://www.python.org/downloads/
1. Install python 3.5.3+: [https://www.python.org/downloads/](https://www.python.org/downloads/)
1. You can use Infura if you don't need ERC20 transfers (Infura doesn't support eth_getFilterLogs JSON RPC method).
For that use `-p https://mainnet.infura.io` option for the commands below. If you need ERC20 transfers or want to
export the data ~40 times faster, you will need to set up a local Ethereum node:
1. Install geth https://github.com/ethereum/go-ethereum/wiki/Installing-Geth
1. Install geth: [https://github.com/ethereum/go-ethereum/wiki/Installing-Geth](https://github.com/ethereum/go-ethereum/wiki/Installing-Geth)
1. Start geth.
Make sure it downloaded the blocks that you need by executing `eth.syncing` in the JS console.
You can export blocks below `currentBlock`,
there is no need to wait until the full sync as the state is not needed (unless you also need contracts bytecode
and token details; for those you need to wait until the full sync).
and token details; for those you need to wait until the full sync). Note that you may need to wait for another day or
two for the node to download the states. See this issue [https://github.com/blockchain-etl/ethereum-etl/issues/265#issuecomment-970451522](https://github.com/blockchain-etl/ethereum-etl/issues/265#issuecomment-970451522).
Make sure to set `--txlookuplimit 0` if you use geth.
1. Install Ethereum ETL: `> pip3 install ethereum-etl`
1. Export all:
@@ -40,7 +40,7 @@ output/token_transfers/start_block=00000000/end_block=00099999/token_transfers_0
Should work with geth and parity, on Linux, Mac, Windows.
If you use Parity you should disable warp mode with `--no-warp` option because warp mode
does not place all of the block or receipt data into the database https://wiki.parity.io/Getting-Synced
does not place all of the block or receipt data into the database [https://wiki.parity.io/Getting-Synced](https://wiki.parity.io/Getting-Synced)
If you see weird behavior, e.g. wrong number of rows in the CSV files or corrupted files,
check out this issue: https://github.com/medvedev1088/ethereum-etl/issues/28

View File

@@ -1,4 +1,4 @@
# Google BiqQuery
# Google BigQuery
## Querying in BigQuery
@@ -16,4 +16,4 @@ Read [this article](https://medium.com/google-cloud/building-token-recommender-i
### Awesome BigQuery Views
https://github.com/blockchain-etl/awesome-bigquery-views
[https://github.com/blockchain-etl/awesome-bigquery-views](https://github.com/blockchain-etl/awesome-bigquery-views)

View File

@@ -2,7 +2,7 @@
Ethereum ETL lets you convert blockchain data into convenient formats like CSVs and relational databases.
With 700+ likes on Github, Ethereum ETL is the most popular open source project for Ethereum data.
With 1,700+ likes on GitHub, Ethereum ETL is the most popular open-source project for Ethereum data.
Data is available for you to query right away in [Google BigQuery](https://goo.gl/oY5BCQ).
@@ -17,8 +17,31 @@ Easily export:
* Receipts
* Logs
* Contracts
* Internal transactions
* Internal transactions (traces)
## Advanced Features
* Stream blockchain data to Pub/Sub, Postgres, or other destinations in real-time
* Filter and transform data using flexible command-line options
* Support for multiple Ethereum node providers (Geth, Parity, Infura, etc.)
* Handles chain reorganizations through configurable lag
* Export data by block range or by date
* Scalable architecture with configurable batch sizes and worker counts
## Use Cases
* Data analysis and visualization
* Machine learning on blockchain data
* Building analytics dashboards
* Market research and token analysis
* Compliance and audit reporting
* Academic research on blockchain economics
## Projects using Ethereum ETL
* [Google](https://goo.gl/oY5BCQ) - Public BigQuery Ethereum datasets
* [Nansen by D5](https://d5.ai/?ref=ethereumetl) - Analytics platform for Ethereum
* [Nansen](https://nansen.ai/query?ref=ethereumetl) - Analytics platform for Ethereum
* [Ethereum Blockchain ETL on GCP](https://cloud.google.com/blog/products/data-analytics/ethereum-bigquery-public-dataset-smart-contract-analytics) - Official Google Cloud reference architecture
## Getting Started
Check the [Quickstart](quickstart.md) guide to begin using Ethereum ETL or explore the [Commands](commands.md) page for detailed usage instructions.

View File

@@ -4,7 +4,7 @@
which means `is_erc20` and `is_erc721` will always be false for proxy contracts and they will be missing in the `tokens`
table.
- The metadata methods (`symbol`, `name`, `decimals`, `total_supply`) for ERC20 are optional, so around 10% of the
contracts are missing this data. Also some contracts (EOS) implement these methods but with wrong return type,
contracts are missing this data. Also some contracts (EOS) implement these methods but with the wrong return type,
so the metadata columns are missing in this case as well.
- `token_transfers.value`, `tokens.decimals` and `tokens.total_supply` have type `STRING` in BigQuery tables,
because numeric types there can't handle 32-byte integers. You should use
@@ -12,4 +12,4 @@ because numeric types there can't handle 32-byte integers. You should use
`safe_cast(value as NUMERIC)` (possible overflow) to convert to numbers.
- The contracts that don't implement `decimals()` function but have the
[fallback function](https://solidity.readthedocs.io/en/v0.4.21/contracts.html#fallback-function) that returns a `boolean`
will have `0` or `1` in the `decimals` column in the CSVs.
will have `0` or `1` in the `decimals` column in the CSVs.

View File

@@ -7,3 +7,4 @@
- [Introducing six new cryptocurrencies in BigQuery Public Datasets—and how to analyze them](https://cloud.google.com/blog/products/data-analytics/introducing-six-new-cryptocurrencies-in-bigquery-public-datasets-and-how-to-analyze-them)
- [Querying the Ethereum Blockchain in Snowflake](https://community.snowflake.com/s/article/Querying-the-Ethereum-Blockchain-in-Snowflake)
- [ConsenSys Grants funds third cohort of projects to benefit the Ethereum ecosystem](https://www.cryptoninjas.net/2020/02/17/consensys-grants-funds-third-cohort-of-projects-to-benefit-the-ethereum-ecosystem/)
- [Unlocking the Power of Google BigQuery (Cloud Next '19)](https://youtu.be/KL_i5XZIaJg?t=131)

View File

@@ -10,7 +10,7 @@ Export blocks and transactions:
```bash
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
--provider-uri https://mainnet.infura.io --blocks-output blocks.csv --transactions-output transactions.csv
--provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c --blocks-output blocks.csv --transactions-output transactions.csv
```
Export ERC20 and ERC721 transfers:

View File

@@ -22,6 +22,11 @@ gas_limit | bigint |
gas_used | bigint |
timestamp | bigint |
transaction_count | bigint |
base_fee_per_gas | bigint |
withdrawals_root | string |
withdrawals | string |
blob_gas_used | bigint |
excess_blob_gas | bigint |
---
@@ -41,6 +46,11 @@ gas | bigint |
gas_price | bigint |
input | hex_string |
block_timestamp | bigint |
max_fee_per_gas | bigint |
max_priority_fee_per_gas | bigint |
transaction_type | bigint |
max_fee_per_blob_gas | bigint |
blob_versioned_hashes | string |
---
@@ -71,6 +81,9 @@ gas_used | bigint |
contract_address | address |
root | hex_string |
status | bigint |
effective_gas_price | bigint |
blob_gas_price | bigint |
blob_gas_used | bigint |
---
@@ -111,6 +124,7 @@ symbol | string |
name | string |
decimals | bigint |
total_supply | numeric |
block_number | bigint |
---
@@ -139,7 +153,7 @@ trace_id | string |
### Differences between geth and parity traces.csv
- `to_address` field differs for `callcode` trace (geth seems to return correct value, as parity value of `to_address` is same as `to_address` of parent call);
- `to_address` field differs for `callcode` trace (geth seems to return correct value, as parity value of `to_address` is the same as `to_address` of parent call);
- geth output doesn't have `reward` traces;
- geth output doesn't have `to_address`, `from_address`, `value` for `suicide` traces;
- `error` field contains human readable error message, which might differ in geth/parity output;
@@ -150,4 +164,4 @@ trace_id | string |
You can find column descriptions in [https://github.com/medvedev1088/ethereum-etl-airflow](https://github.com/medvedev1088/ethereum-etl-airflow/tree/master/dags/resources/stages/raw/schemas)
Note: for the `address` type all hex characters are lower-cased.
`boolean` type can have 2 values: `True` or `False`.
`boolean` type can have 2 values: `True` or `False`.

View File

@@ -19,12 +19,17 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from blockchainetl.logging_utils import logging_basic_config
logging_basic_config()
import click
from ethereumetl.cli.export_all import export_all
from ethereumetl.cli.export_blocks_and_transactions import export_blocks_and_transactions
from ethereumetl.cli.export_contracts import export_contracts
from ethereumetl.cli.export_geth_traces import export_geth_traces
from ethereumetl.cli.export_origin import export_origin
from ethereumetl.cli.export_receipts_and_logs import export_receipts_and_logs
from ethereumetl.cli.export_token_transfers import export_token_transfers
from ethereumetl.cli.export_tokens import export_tokens
@@ -43,7 +48,7 @@ from ethereumetl.cli.stream import stream
@click.group()
@click.version_option(version='1.5.1')
@click.version_option(version='2.4.2')
@click.pass_context
def cli(ctx):
pass
@@ -52,6 +57,7 @@ def cli(ctx):
# export
cli.add_command(export_all, "export_all")
cli.add_command(export_blocks_and_transactions, "export_blocks_and_transactions")
cli.add_command(export_origin, "export_origin")
cli.add_command(export_receipts_and_logs, "export_receipts_and_logs")
cli.add_command(export_token_transfers, "export_token_transfers")
cli.add_command(extract_token_transfers, "extract_token_transfers")

View File

@@ -27,7 +27,7 @@ import re
from datetime import datetime, timedelta
from blockchainetl.logging_utils import logging_basic_config
from web3 import Web3
from ethereumetl.web3_utils import build_web3
from ethereumetl.jobs.export_all_common import export_all_common
from ethereumetl.providers.auto import get_provider_from_uri
@@ -74,7 +74,7 @@ def get_partitions(start, end, partition_batch_size, provider_uri):
day = timedelta(days=1)
provider = get_provider_from_uri(provider_uri)
web3 = Web3(provider)
web3 = build_web3(provider)
eth_service = EthService(web3)
while start_date <= end_date:

View File

@@ -36,7 +36,7 @@ logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
@click.option('-c', '--contract-addresses', required=True, type=str,
@click.option('-ca', '--contract-addresses', required=True, type=str,
help='The file containing contract addresses, one per line.')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')

View File

@@ -0,0 +1,56 @@
# A job to export data from Origin Protocol.
#
# Origin Protocol is an open source platform for implementing blockchain e-commerce.
# More details at https://www.originprotool.com
#
# The core of the platform is the marketplace smart contract:
# - Code: https://etherscan.io/address/0x698ff47b84837d3971118a369c570172ee7e54c2
# - Address: https://github.com/OriginProtocol/origin/blob/master/packages/contracts/contracts/marketplace/V01_Marketplace.sol
#
# Transactional data is stored on-chain, while side-metadata is stored in IPFS (https://ipfs.io).
#
# Given a range of block numbers, the job queries the blockchain for events emitted by the contract.
# Every event includes a hash pointing to a marketplace listing metadata stored as a JSON file on IPFS.
# A marketplace listing can either be a single self-contained listing, or the entry point for the entire
# catalog of products from a shop.
#
# The job generates 2 data sets:
# - Marketplace listings
# - Shop products.
#
import click
from ethereumetl.web3_utils import build_web3
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.jobs.export_origin_job import ExportOriginJob
from ethereumetl.jobs.exporters.origin_exporter import origin_marketplace_listing_item_exporter, origin_shop_product_item_exporter
from ethereumetl.ipfs.origin import get_origin_ipfs_client
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.thread_local_proxy import ThreadLocalProxy
logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
@click.option('-e', '--end-block', required=True, type=int, help='End block')
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
@click.option('--marketplace-output', default='-', show_default=True, type=str, help='The output file for marketplace data. If not specified stdout is used.')
@click.option('--shop-output', default='-', show_default=True, type=str, help='The output file for shop data. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
@click.option('-p', '--provider-uri', required=True, type=str,
help='The URI of the web3 provider e.g. file://$HOME/Library/Ethereum/geth.ipc or http://localhost:8545/')
def export_origin(start_block, end_block, batch_size, marketplace_output, shop_output, max_workers, provider_uri):
"""Exports Origin Protocol data."""
job = ExportOriginJob(
start_block=start_block,
end_block=end_block,
batch_size=batch_size,
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
ipfs_client=get_origin_ipfs_client(),
marketplace_listing_exporter=origin_marketplace_listing_item_exporter(marketplace_output),
shop_product_exporter=origin_shop_product_item_exporter(shop_output),
max_workers=max_workers)
job.run()

View File

@@ -46,7 +46,7 @@ logging_basic_config()
help='The output file for receipts. If not provided receipts will not be exported. Use "-" for stdout')
@click.option('--logs-output', default=None, show_default=True, type=str,
help='The output file for receipt logs. '
'aIf not provided receipt logs will not be exported. Use "-" for stdout')
'If not provided receipt logs will not be exported. Use "-" for stdout')
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
def export_receipts_and_logs(batch_size, transaction_hashes, provider_uri, max_workers, receipts_output, logs_output,
chain='ethereum'):

View File

@@ -23,8 +23,9 @@
import click
from web3 import Web3
from ethereumetl.web3_utils import build_web3
from ethereumetl.csv_utils import set_max_field_size_limit
from ethereumetl.jobs.export_token_transfers_job import ExportTokenTransfersJob
from ethereumetl.jobs.exporters.token_transfers_item_exporter import token_transfers_item_exporter
from blockchainetl.logging_utils import logging_basic_config
@@ -42,14 +43,15 @@ logging_basic_config()
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
@click.option('-p', '--provider-uri', required=True, type=str,
help='The URI of the web3 provider e.g. file://$HOME/Library/Ethereum/geth.ipc or http://localhost:8545/')
@click.option('-t', '--tokens', default=None, show_default=True, type=str, nargs=1, help='The list of token addresses to filter by.')
@click.option('-t', '--tokens', default=None, show_default=True, type=str, multiple=True, help='The list of token addresses to filter by.')
def export_token_transfers(start_block, end_block, batch_size, output, max_workers, provider_uri, tokens):
"""Exports ERC20/ERC721 transfers."""
set_max_field_size_limit()
job = ExportTokenTransfersJob(
start_block=start_block,
end_block=end_block,
batch_size=batch_size,
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
item_exporter=token_transfers_item_exporter(output),
max_workers=max_workers,
tokens=tokens)

View File

@@ -23,7 +23,7 @@
import click
from web3 import Web3
from ethereumetl.web3_utils import build_web3
from blockchainetl.file_utils import smart_open
from ethereumetl.jobs.export_tokens_job import ExportTokensJob
@@ -51,7 +51,7 @@ def export_tokens(token_addresses, output, max_workers, provider_uri, chain='eth
with smart_open(token_addresses, 'r') as token_addresses_file:
job = ExportTokensJob(
token_addresses_iterable=(token_address.strip() for token_address in token_addresses_file),
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
item_exporter=tokens_item_exporter(output),
max_workers=max_workers)

View File

@@ -23,7 +23,7 @@
import click
from web3 import Web3
from ethereumetl.web3_utils import build_web3
from ethereumetl.jobs.export_traces_job import ExportTracesJob
from blockchainetl.logging_utils import logging_basic_config
@@ -57,7 +57,7 @@ def export_traces(start_block, end_block, batch_size, output, max_workers, provi
start_block=start_block,
end_block=end_block,
batch_size=batch_size,
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri, timeout=timeout))),
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri, timeout=timeout))),
item_exporter=traces_item_exporter(output),
max_workers=max_workers,
include_genesis_traces=genesis_traces,

View File

@@ -25,30 +25,35 @@ import click
import csv
import json
from ethereumetl.csv_utils import set_max_field_size_limit
from blockchainetl.file_utils import smart_open
from blockchainetl.jobs.exporters.converters.int_to_string_item_converter import IntToStringItemConverter
from ethereumetl.jobs.exporters.token_transfers_item_exporter import token_transfers_item_exporter
from ethereumetl.jobs.extract_token_transfers_job import ExtractTokenTransfersJob
from blockchainetl.logging_utils import logging_basic_config
logging_basic_config()
set_max_field_size_limit()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-l', '--logs', type=str, required=True, help='The CSV file containing receipt logs.')
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
def extract_token_transfers(logs, batch_size, output, max_workers):
@click.option('--values-as-strings', default=False, show_default=True, is_flag=True, help='Whether to convert values to strings.')
def extract_token_transfers(logs, batch_size, output, max_workers, values_as_strings=False):
"""Extracts ERC20/ERC721 transfers from logs file."""
with smart_open(logs, 'r') as logs_file:
if logs.endswith('.json'):
logs_reader = (json.loads(line) for line in logs_file)
else:
logs_reader = csv.DictReader(logs_file)
converters = [IntToStringItemConverter(keys=['value'])] if values_as_strings else []
job = ExtractTokenTransfersJob(
logs_iterable=logs_reader,
batch_size=batch_size,
max_workers=max_workers,
item_exporter=token_transfers_item_exporter(output))
item_exporter=token_transfers_item_exporter(output, converters=converters))
job.run()

View File

@@ -27,12 +27,13 @@ import json
import click
from blockchainetl.csv_utils import set_max_field_size_limit
from blockchainetl.file_utils import smart_open
from blockchainetl.jobs.exporters.converters.int_to_string_item_converter import IntToStringItemConverter
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
from ethereumetl.jobs.extract_tokens_job import ExtractTokensJob
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.thread_local_proxy import ThreadLocalProxy
from web3 import Web3
from ethereumetl.web3_utils import build_web3
logging_basic_config()
@@ -44,7 +45,8 @@ logging_basic_config()
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
def extract_tokens(contracts, provider_uri, output, max_workers):
@click.option('--values-as-strings', default=False, show_default=True, is_flag=True, help='Whether to convert values to strings.')
def extract_tokens(contracts, provider_uri, output, max_workers, values_as_strings=False):
"""Extracts tokens from contracts file."""
set_max_field_size_limit()
@@ -54,10 +56,11 @@ def extract_tokens(contracts, provider_uri, output, max_workers):
contracts_iterable = (json.loads(line) for line in contracts_file)
else:
contracts_iterable = csv.DictReader(contracts_file)
converters = [IntToStringItemConverter(keys=['decimals', 'total_supply'])] if values_as_strings else []
job = ExtractTokensJob(
contracts_iterable=contracts_iterable,
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
max_workers=max_workers,
item_exporter=tokens_item_exporter(output))
item_exporter=tokens_item_exporter(output, converters))
job.run()

View File

@@ -24,7 +24,7 @@
import click
from datetime import datetime
from web3 import Web3
from ethereumetl.web3_utils import build_web3
from blockchainetl.file_utils import smart_open
from blockchainetl.logging_utils import logging_basic_config
@@ -47,7 +47,7 @@ def get_block_range_for_date(provider_uri, date, output, chain='ethereum'):
"""Outputs start and end blocks for given date."""
provider_uri = check_classic_provider_uri(chain, provider_uri)
provider = get_provider_from_uri(provider_uri)
web3 = Web3(provider)
web3 = build_web3(provider)
eth_service = EthService(web3)
start_block, end_block = eth_service.get_block_range_for_date(date)

View File

@@ -23,7 +23,7 @@
import click
from web3 import Web3
from ethereumetl.web3_utils import build_web3
from blockchainetl.file_utils import smart_open
from blockchainetl.logging_utils import logging_basic_config
@@ -46,7 +46,7 @@ def get_block_range_for_timestamps(provider_uri, start_timestamp, end_timestamp,
"""Outputs start and end blocks for given timestamps."""
provider_uri = check_classic_provider_uri(chain, provider_uri)
provider = get_provider_from_uri(provider_uri)
web3 = Web3(provider)
web3 = build_web3(provider)
eth_service = EthService(web3)
start_block, end_block = eth_service.get_block_range_for_timestamps(start_timestamp, end_timestamp)

View File

@@ -27,6 +27,7 @@ from blockchainetl.streaming.streaming_utils import configure_signals, configure
from ethereumetl.enumeration.entity_type import EntityType
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.streaming.item_exporter_creator import create_item_exporters
from ethereumetl.thread_local_proxy import ThreadLocalProxy
@@ -38,7 +39,10 @@ from ethereumetl.thread_local_proxy import ThreadLocalProxy
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
@click.option('-o', '--output', type=str,
help='Either Google PubSub topic path e.g. projects/your-project/topics/crypto_ethereum; '
'or Postgres connection url e.g. postgresql+pg8000://postgres:admin@127.0.0.1:5432/ethereum. '
'or Postgres connection url e.g. postgresql+pg8000://postgres:admin@127.0.0.1:5432/ethereum; '
'or GCS bucket e.g. gs://your-bucket-name; '
'or kafka, output name and connection host:port e.g. kafka/127.0.0.1:9092 '
'or Kinesis, e.g. kinesis://your-data-stream-name'
'If not specified will print to console')
@click.option('-s', '--start-block', default=None, show_default=True, type=int, help='Start block')
@click.option('-e', '--entity-types', default=','.join(EntityType.ALL_FOR_INFURA), show_default=True, type=str,
@@ -55,9 +59,7 @@ def stream(last_synced_block_file, lag, provider_uri, output, start_block, entit
configure_logging(log_file)
configure_signals()
entity_types = parse_entity_types(entity_types)
validate_entity_types(entity_types, output)
from ethereumetl.streaming.item_exporter_creator import create_item_exporter
from ethereumetl.streaming.eth_streamer_adapter import EthStreamerAdapter
from blockchainetl.streaming.streamer import Streamer
@@ -67,7 +69,7 @@ def stream(last_synced_block_file, lag, provider_uri, output, start_block, entit
streamer_adapter = EthStreamerAdapter(
batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
item_exporter=create_item_exporter(output),
item_exporter=create_item_exporters(output),
batch_size=batch_size,
max_workers=max_workers,
entity_types=entity_types
@@ -97,14 +99,6 @@ def parse_entity_types(entity_types):
return entity_types
def validate_entity_types(entity_types, output):
from ethereumetl.streaming.item_exporter_creator import determine_item_exporter_type, ItemExporterType
item_exporter_type = determine_item_exporter_type(output)
if item_exporter_type == ItemExporterType.POSTGRES \
and (EntityType.CONTRACT in entity_types or EntityType.TOKEN in entity_types):
raise ValueError('contract and token are not yet supported entity types for postgres item exporter.')
def pick_random_provider_uri(provider_uri):
provider_uris = [uri.strip() for uri in provider_uri.split(',')]
return random.choice(provider_uris)

View File

@@ -40,6 +40,12 @@ class EthBlock(object):
self.gas_limit = None
self.gas_used = None
self.timestamp = None
self.withdrawals_root = None
self.transactions = []
self.transaction_count = 0
self.base_fee_per_gas = 0
self.withdrawals = []
self.blob_gas_used = None
self.excess_blob_gas = None

View File

@@ -0,0 +1,32 @@
class OriginMarketplaceListing(object):
def __init__(self):
self.listing_id = None
self.ipfs_hash = None
self.listing_type = None
self.category = None
self.subcategory = None
self.language = None
self.title = None
self.description = None
self.price = None
self.currency = None
self.block_number = None
self.log_index = None
class OriginShopProduct(object):
def __init__(self):
self.listing_id = None
self.product_id = None
self.ipfs_path = None
self.external_id = None
self.parent_external_id = None
self.title = None
self.description = None
self.price = None
self.currency = None
self.image = None
self.option1 = None
self.option2 = None
self.option3 = None
self.block_number = None
self.log_index = None

View File

@@ -33,3 +33,10 @@ class EthReceipt(object):
self.logs = []
self.root = None
self.status = None
self.effective_gas_price = None
self.l1_fee = None
self.l1_gas_used = None
self.l1_gas_price = None
self.l1_fee_scalar = None
self.blob_gas_price = None
self.blob_gas_used = None

View File

@@ -41,3 +41,4 @@ class EthTrace(object):
self.error = None
self.status = None
self.trace_id = None
self.trace_index = None

View File

@@ -34,3 +34,8 @@ class EthTransaction(object):
self.gas = None
self.gas_price = None
self.input = None
self.max_fee_per_gas = None
self.max_priority_fee_per_gas = None
self.transaction_type = None
self.max_fee_per_blob_gas = None
self.blob_versioned_hashes = []

View File

@@ -239,6 +239,109 @@ ERC20_ABI = json.loads('''
],
"name": "Approval",
"type": "event"
},
{
"constant": true,
"inputs": [],
"name": "NAME",
"outputs": [
{
"name": "",
"type": "string"
}
],
"payable": false,
"stateMutability": "view",
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "SYMBOL",
"outputs": [
{
"name": "",
"type": "string"
}
],
"payable": false,
"stateMutability": "view",
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "DECIMALS",
"outputs": [
{
"name": "",
"type": "uint8"
}
],
"payable": false,
"stateMutability": "view",
"type": "function"
}
]
''')
ERC20_ABI_ALTERNATIVE_1 = json.loads('''
[
{
"constant": true,
"inputs": [],
"name": "symbol",
"outputs": [
{
"name": "",
"type": "bytes32"
}
],
"payable": false,
"stateMutability": "view",
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "SYMBOL",
"outputs": [
{
"name": "",
"type": "bytes32"
}
],
"payable": false,
"stateMutability": "view",
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "name",
"outputs": [
{
"name": "",
"type": "bytes32"
}
],
"payable": false,
"stateMutability": "view",
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "NAME",
"outputs": [
{
"name": "",
"type": "bytes32"
}
],
"payable": false,
"stateMutability": "view",
"type": "function"
}
]
''')

View File

@@ -24,7 +24,7 @@ import logging
import time
from requests.exceptions import Timeout as RequestsTimeout, HTTPError, TooManyRedirects
from web3.utils.threads import Timeout as Web3Timeout
from web3._utils.threads import Timeout as Web3Timeout
from ethereumetl.executors.bounded_executor import BoundedExecutor
from ethereumetl.executors.fail_safe_executor import FailSafeExecutor

View File

@@ -44,7 +44,7 @@ class BaseItemExporter(object):
self._configure(kwargs)
def _configure(self, options, dont_fail=False):
"""Configure the exporter by poping options from the ``options`` dict.
"""Configure the exporter by popping options from the ``options`` dict.
If dont_fail is set, it won't raise an exception on unexpected options
(useful for using with keyword arguments in subclasses constructors)
"""

View File

View File

@@ -0,0 +1,31 @@
import logging
import requests
logger = logging.getLogger('ipfs')
IPFS_TIMEOUT = 5 # Timeout in second
IPFS_NUM_ATTEMPTS = 3
# A simple client to fetch content from IPFS gateways.
class IpfsClient:
def __init__(self, gatewayUrls):
self._gatewayUrls = gatewayUrls
def _get(self, path, json):
for i in range(IPFS_NUM_ATTEMPTS):
# Round-robin thru the gateways.
gatewayUrl = self._gatewayUrls[i % len(self._gatewayUrls)]
try:
url = "{}/{}".format(gatewayUrl, path)
r = requests.get(url, timeout=IPFS_TIMEOUT)
r.raise_for_status()
return r.json() if json else r.text
except Exception as e:
logger.error("Attempt #{} - Failed downloading {}: {}".format(i + 1, path, e))
raise Exception("IPFS download failure for hash {}".format(path))
def get(self, path):
return self._get(path, False)
def get_json(self, path):
return self._get(path, True)

139
ethereumetl/ipfs/origin.py Normal file
View File

@@ -0,0 +1,139 @@
import logging
import re
from ethereumetl.domain.origin import OriginMarketplaceListing, OriginShopProduct
from ethereumetl.ipfs.client import IpfsClient
logger = logging.getLogger('origin')
IPFS_PRIMARY_GATEWAY_URL = 'https://cf-ipfs.com/ipfs'
IPFS_SECONDARY_GATEWAY_URL = 'https://gateway.ipfs.io/ipfs'
# Returns an IPFS client that can be used to fetch Origin Protocol's data.
def get_origin_ipfs_client():
return IpfsClient([IPFS_PRIMARY_GATEWAY_URL, IPFS_SECONDARY_GATEWAY_URL])
# Parses the shop's HTML index page to extract the name of the IPFS directory under
# which all the shop data is located.
def _get_shop_data_dir(shop_index_page):
match = re.search('<link rel="data-dir" href="(.+?)"', shop_index_page)
return match.group(1) if match else None
# Returns the list of products from an Origin Protocol shop.
def _get_origin_shop_products(receipt_log, listing_id, ipfs_client, shop_ipfs_hash):
results = []
shop_index_page = ipfs_client.get(shop_ipfs_hash + "/index.html")
shop_data_dir = _get_shop_data_dir(shop_index_page)
path = "{}/{}".format(shop_ipfs_hash, shop_data_dir) if shop_data_dir else shop_ipfs_hash
logger.debug("Using shop path {}".format(path))
products_path = "{}/{}".format(path, 'products.json')
try:
products = ipfs_client.get_json(products_path)
except Exception as e:
logger.error("Listing {} Failed downloading product {}: {}".format(listing_id, products_path, e))
return results
logger.info("Found {} products in for listing {}".format(len(products), listing_id))
# Go through all the products from the shop.
for product in products:
product_id = product.get('id')
if not product_id:
logger.error('Product entry with missing id in products.json')
continue
logger.info("Processing product {}".format(product_id))
# Fetch the product details to get the variants.
product_base_path = "{}/{}".format(path, product_id)
product_data_path = "{}/{}".format(product_base_path, 'data.json')
try:
product = ipfs_client.get_json(product_data_path)
except Exception as e:
logger.error("Failed downloading {}: {}".format(product_data_path, e))
continue
# Extract the top product.
result = OriginShopProduct()
result.block_number = receipt_log.block_number
result.log_index = receipt_log.log_index
result.listing_id = listing_id
result.product_id = "{}-{}".format(listing_id, product_id)
result.ipfs_path = product_base_path
result.external_id = str(product.get('externalId')) if product.get('externalId') else None
result.parent_external_id = None
result.title = product.get('title')
result.description = product.get('description')
result.price = product.get('price')
result.currency = product.get('currency', 'fiat-USD')
result.option1 = None
result.option2 = None
result.option3 = None
result.image = product.get('image')
results.append(result)
# Extract the variants, if any.
variants = product.get('variants', [])
if len(variants) > 0:
logger.info("Found {} variants".format(len(variants)))
for variant in variants:
result = OriginShopProduct()
result.block_number = receipt_log.block_number
result.log_index = receipt_log.log_index
result.listing_id = listing_id
result.product_id = "{}-{}".format(listing_id, variant.get('id'))
result.ipfs_path = product_base_path
result.external_id = str(variant.get('externalId')) if variant.get('externalId') else None
result.parent_external_id = str(product.get('externalId')) if product.get('externalId') else None
result.title = variant.get('title')
result.description = product.get('description')
result.price = variant.get('price')
result.currency = product.get('currency', 'fiat-USD')
result.option1 = variant.get('option1')
result.option2 = variant.get('option2')
result.option3 = variant.get('option3')
result.image = variant.get('image')
results.append(result)
return results
# Returns a listing from the Origin Protocol marketplace.
def get_origin_marketplace_data(receipt_log, listing_id, ipfs_client, ipfs_hash):
# Load the listing's metadata from IPFS.
try:
listing_data = ipfs_client.get_json(ipfs_hash)
except Exception as e:
logger.error("Extraction failed. Listing {} Listing hash {} - {}".format(listing_id, ipfs_hash, e))
return None, []
# Fill-in an OriginMarketplaceListing object based on the IPFS data.
listing = OriginMarketplaceListing()
listing.block_number = receipt_log.block_number
listing.log_index = receipt_log.log_index
listing.listing_id = str(listing_id)
listing.ipfs_hash = ipfs_hash
listing.listing_type = listing_data.get('listingType', '')
listing.category = listing_data.get('category', '')
listing.subcategory = listing_data.get('subCategory', '')
listing.language = listing_data.get('language', '')
listing.title = listing_data.get('title', '')
listing.description = listing_data.get('description', '')
listing.price = listing_data.get('price', {}).get('amount', '')
listing.currency = listing_data.get('price', {}).get('currency', '')
# If it is a shop listing, also extract all of the shop data.
shop_listings = []
shop_ipfs_hash = listing_data.get('shopIpfsHash')
if shop_ipfs_hash:
try:
shop_listings = _get_origin_shop_products(receipt_log, listing_id, ipfs_client, shop_ipfs_hash)
except Exception as e:
logger.error("Extraction failed. Listing {} Shop hash {} - {}".format(listing_id, shop_ipfs_hash, e))
return listing, shop_listings

View File

@@ -41,7 +41,7 @@ from ethereumetl.jobs.exporters.token_transfers_item_exporter import token_trans
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.thread_local_proxy import ThreadLocalProxy
from web3 import Web3
from ethereumetl.web3_utils import build_web3
logger = logging.getLogger('export_all')
@@ -146,7 +146,7 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
start_block=batch_start_block,
end_block=batch_end_block,
batch_size=batch_size,
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
item_exporter=token_transfers_item_exporter(token_transfers_file),
max_workers=max_workers)
job.run()
@@ -272,7 +272,7 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
with smart_open(token_addresses_file, 'r') as token_addresses:
job = ExportTokensJob(
token_addresses_iterable=(token_address.strip() for token_address in token_addresses),
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
item_exporter=tokens_item_exporter(tokens_file),
max_workers=max_workers)
job.run()

View File

@@ -0,0 +1,132 @@
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from blockchainetl.jobs.base_job import BaseJob
from ethereumetl.utils import validate_range
from ethereumetl.mappers.receipt_log_mapper import EthReceiptLogMapper
from ethereumetl.mappers.origin_mapper import OriginMarketplaceListingMapper, OriginShopProductMapper
from ethereumetl.service.origin_extractor import OriginEventExtractor
# Addresses of the marketplace contracts.
ORIGIN_MARKETPLACE_V0_CONTRACT_ADDRESS = '0x819Bb9964B6eBF52361F1ae42CF4831B921510f9'
ORIGIN_MARKETPLACE_V1_CONTRACT_ADDRESS = '0x698Ff47B84837d3971118a369c570172EE7e54c2'
# Block number at which contracts were deployed to the Mainnet.
ORIGIN_MARKETPLACE_V0_BLOCK_NUMBER_EPOCH = 6436157
ORIGIN_MARKETPLACE_V1_BLOCK_NUMBER_EPOCH = 8582597
class ExportOriginJob(BaseJob):
def __init__(
self,
start_block,
end_block,
batch_size,
web3,
ipfs_client,
marketplace_listing_exporter,
shop_product_exporter,
max_workers):
validate_range(start_block, end_block)
self.start_block = start_block
self.end_block = end_block
self.web3 = web3
self.marketplace_listing_exporter = marketplace_listing_exporter
self.shop_product_exporter = shop_product_exporter
self.batch_work_executor = BatchWorkExecutor(batch_size, max_workers)
self.event_extractor = OriginEventExtractor(ipfs_client)
self.receipt_log_mapper = EthReceiptLogMapper()
self.marketplace_listing_mapper = OriginMarketplaceListingMapper()
self.shop_listing_mapper = OriginShopProductMapper()
self._supports_eth_newFilter = True
def _start(self):
self.marketplace_listing_exporter.open()
self.shop_product_exporter.open()
def _export(self):
self.batch_work_executor.execute(
range(self.start_block, self.end_block + 1),
self._export_batch,
total_items=self.end_block - self.start_block + 1
)
def _export_batch(self, block_number_batch):
assert len(block_number_batch) > 0
from_block = block_number_batch[0]
to_block = block_number_batch[-1]
# Nothing to process if the block range is older than the V0 marketplace contract's epoch.
if to_block < ORIGIN_MARKETPLACE_V0_BLOCK_NUMBER_EPOCH:
return
# Determine the version and address of the marketplace contract to query based on the block range.
batches = []
if to_block < ORIGIN_MARKETPLACE_V1_BLOCK_NUMBER_EPOCH or from_block >= ORIGIN_MARKETPLACE_V1_BLOCK_NUMBER_EPOCH:
# The block range falls within a single version of the marketplace contract.
version = '000' if to_block < ORIGIN_MARKETPLACE_V1_BLOCK_NUMBER_EPOCH else '001'
address = ORIGIN_MARKETPLACE_V0_CONTRACT_ADDRESS if version == '000' else ORIGIN_MARKETPLACE_V1_CONTRACT_ADDRESS
batches.append({
'contract_address': address,
'contract_version': version,
'from_block': from_block,
'to_block': to_block
})
else:
# The block range spans across 2 versions of the marketplace contract.
batches.append({
'contract_address': ORIGIN_MARKETPLACE_V0_CONTRACT_ADDRESS,
'contract_version': '000',
'from_block': from_block,
'to_block': ORIGIN_MARKETPLACE_V1_BLOCK_NUMBER_EPOCH - 1
})
batches.append({
'contract_address': ORIGIN_MARKETPLACE_V1_CONTRACT_ADDRESS,
'contract_version': '001',
'from_block': ORIGIN_MARKETPLACE_V1_BLOCK_NUMBER_EPOCH,
'to_block': to_block
})
for batch in batches:
# https://ethereum.org/en/developers/docs/apis/json-rpc/#eth_getfilterlogs
filter_params = {
'address': batch['contract_address'],
'fromBlock': batch['from_block'],
'toBlock': batch['to_block']
}
if self._supports_eth_newFilter:
try:
event_filter = self.web3.eth.filter(filter_params)
events = event_filter.get_all_entries()
except ValueError as e:
if str(e) == "{'code': -32000, 'message': 'the method is currently not implemented: eth_newFilter'}":
self._supports_eth_newFilter = False
events = self.web3.eth.getLogs(filter_params)
else:
raise(e)
else:
events = self.web3.eth.getLogs(filter_params)
for event in events:
log = self.receipt_log_mapper.web3_dict_to_receipt_log(event)
listing, shop_products = self.event_extractor.extract_event_from_log(log, batch['contract_version'])
if listing:
item = self.marketplace_listing_mapper.listing_to_dict(listing)
self.marketplace_listing_exporter.export_item(item)
for product in shop_products:
item = self.shop_listing_mapper.product_to_dict(product)
self.shop_product_exporter.export_item(item)
if self._supports_eth_newFilter:
self.web3.eth.uninstallFilter(event_filter.filter_id)
def _end(self):
self.batch_work_executor.shutdown()
self.marketplace_listing_exporter.close()
self.shop_product_exporter.close()

View File

@@ -51,6 +51,7 @@ class ExportTokenTransfersJob(BaseJob):
self.receipt_log_mapper = EthReceiptLogMapper()
self.token_transfer_mapper = EthTokenTransferMapper()
self.token_transfer_extractor = EthTokenTransferExtractor()
self._supports_eth_newFilter = True
def _start(self):
self.item_exporter.open()
@@ -64,7 +65,7 @@ class ExportTokenTransfersJob(BaseJob):
def _export_batch(self, block_number_batch):
assert len(block_number_batch) > 0
# https://github.com/ethereum/wiki/wiki/JSON-RPC#eth_getfilterlogs
# https://ethereum.org/en/developers/docs/apis/json-rpc/#eth_getfilterlogs
filter_params = {
'fromBlock': block_number_batch[0],
'toBlock': block_number_batch[-1],
@@ -74,15 +75,23 @@ class ExportTokenTransfersJob(BaseJob):
if self.tokens is not None and len(self.tokens) > 0:
filter_params['address'] = self.tokens
event_filter = self.web3.eth.filter(filter_params)
events = event_filter.get_all_entries()
try:
event_filter = self.web3.eth.filter(filter_params)
events = event_filter.get_all_entries()
except ValueError as e:
if str(e) == "{'code': -32000, 'message': 'the method is currently not implemented: eth_newFilter'}":
self._supports_eth_newFilter = False
events = self.web3.eth.getLogs(filter_params)
else:
raise(e)
for event in events:
log = self.receipt_log_mapper.web3_dict_to_receipt_log(event)
token_transfer = self.token_transfer_extractor.extract_transfer_from_log(log)
if token_transfer is not None:
self.item_exporter.export_item(self.token_transfer_mapper.token_transfer_to_dict(token_transfer))
self.web3.eth.uninstallFilter(event_filter.filter_id)
if self._supports_eth_newFilter:
self.web3.eth.uninstallFilter(event_filter.filter_id)
def _end(self):
self.batch_work_executor.shutdown()

View File

@@ -19,6 +19,7 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import logging
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from blockchainetl.jobs.base_job import BaseJob
@@ -89,13 +90,14 @@ class ExportTracesJob(BaseJob):
json_traces = self.web3.parity.traceBlock(block_number)
if json_traces is None:
raise ValueError('Response from the node is None. Is the node fully synced?')
raise ValueError('Response from the node is None. Is the node fully synced? Is the node started with tracing enabled? Is trace_block API enabled?')
traces = [self.trace_mapper.json_dict_to_trace(json_trace) for json_trace in json_traces]
all_traces.extend(traces)
calculate_trace_statuses(all_traces)
calculate_trace_ids(all_traces)
calculate_trace_indexes(all_traces)
for trace in all_traces:
self.item_exporter.export_item(self.trace_mapper.trace_to_dict(trace))
@@ -103,3 +105,9 @@ class ExportTracesJob(BaseJob):
def _end(self):
self.batch_work_executor.shutdown()
self.item_exporter.close()
def calculate_trace_indexes(traces):
# Only works if traces were originally ordered correctly which is the case for Parity traces
for ind, trace in enumerate(traces):
trace.trace_index = ind

View File

@@ -41,7 +41,12 @@ BLOCK_FIELDS_TO_EXPORT = [
'gas_limit',
'gas_used',
'timestamp',
'transaction_count'
'transaction_count',
'base_fee_per_gas',
'withdrawals_root',
'withdrawals',
'blob_gas_used',
'excess_blob_gas'
]
TRANSACTION_FIELDS_TO_EXPORT = [
@@ -56,7 +61,12 @@ TRANSACTION_FIELDS_TO_EXPORT = [
'gas',
'gas_price',
'input',
'block_timestamp'
'block_timestamp',
'max_fee_per_gas',
'max_priority_fee_per_gas',
'transaction_type',
'max_fee_per_blob_gas',
'blob_versioned_hashes'
]

View File

@@ -0,0 +1,58 @@
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
MARKETPLACE_FIELDS_TO_EXPORT = [
'block_number',
'log_index',
'listing_id',
'ipfs_hash',
'listing_type',
'ipfs_hash',
'category',
'subcategory',
'language',
'title',
'description',
'price',
'currency'
]
SHOP_FIELDS_TO_EXPORT = [
'block_number',
'log_index',
'listing_id',
'product_id',
'ipfs_path',
'ipfs_hash',
'external_id',
'parent_external_id',
'title',
'description',
'price',
'currency',
'option1',
'option2',
'option3',
'image'
]
def origin_marketplace_listing_item_exporter(output):
return CompositeItemExporter(
filename_mapping={
'origin_marketplace_listing': output
},
field_mapping={
'origin_marketplace_listing': MARKETPLACE_FIELDS_TO_EXPORT
}
)
def origin_shop_product_item_exporter(output):
return CompositeItemExporter(
filename_mapping={
'origin_shop_product': output
},
field_mapping={
'origin_shop_product': SHOP_FIELDS_TO_EXPORT
}
)

View File

@@ -32,7 +32,14 @@ RECEIPT_FIELDS_TO_EXPORT = [
'gas_used',
'contract_address',
'root',
'status'
'status',
'effective_gas_price',
'l1_fee',
'l1_gas_used',
'l1_gas_price',
'l1_fee_scalar',
'blob_gas_price',
'blob_gas_used'
]
LOG_FIELDS_TO_EXPORT = [

View File

@@ -34,12 +34,13 @@ FIELDS_TO_EXPORT = [
]
def token_transfers_item_exporter(token_transfer_output):
def token_transfers_item_exporter(token_transfer_output, converters=()):
return CompositeItemExporter(
filename_mapping={
'token_transfer': token_transfer_output
},
field_mapping={
'token_transfer': FIELDS_TO_EXPORT
}
},
converters=converters
)

View File

@@ -33,12 +33,13 @@ FIELDS_TO_EXPORT = [
]
def tokens_item_exporter(tokens_output):
def tokens_item_exporter(tokens_output, converters=()):
return CompositeItemExporter(
filename_mapping={
'token': tokens_output
},
field_mapping={
'token': FIELDS_TO_EXPORT
}
},
converters=converters
)

View File

@@ -52,6 +52,10 @@ class EthBlockMapper(object):
block.gas_limit = hex_to_dec(json_dict.get('gasLimit'))
block.gas_used = hex_to_dec(json_dict.get('gasUsed'))
block.timestamp = hex_to_dec(json_dict.get('timestamp'))
block.base_fee_per_gas = hex_to_dec(json_dict.get('baseFeePerGas'))
block.withdrawals_root = json_dict.get('withdrawalsRoot')
block.blob_gas_used = hex_to_dec(json_dict.get('blobGasUsed'))
block.excess_blob_gas = hex_to_dec(json_dict.get('excessBlobGas'))
if 'transactions' in json_dict:
block.transactions = [
@@ -62,8 +66,22 @@ class EthBlockMapper(object):
block.transaction_count = len(json_dict['transactions'])
if 'withdrawals' in json_dict:
block.withdrawals = self.parse_withdrawals(json_dict['withdrawals'])
return block
def parse_withdrawals(self, withdrawals):
return [
{
"index": hex_to_dec(withdrawal["index"]),
"validator_index": hex_to_dec(withdrawal["validatorIndex"]),
"address": withdrawal["address"],
"amount": hex_to_dec(withdrawal["amount"]),
}
for withdrawal in withdrawals
]
def block_to_dict(self, block):
return {
'type': 'block',
@@ -85,4 +103,9 @@ class EthBlockMapper(object):
'gas_used': block.gas_used,
'timestamp': block.timestamp,
'transaction_count': block.transaction_count,
'base_fee_per_gas': block.base_fee_per_gas,
'withdrawals_root': block.withdrawals_root,
'withdrawals': block.withdrawals,
'blob_gas_used': block.blob_gas_used,
'excess_blob_gas': block.excess_blob_gas,
}

View File

@@ -0,0 +1,38 @@
class OriginMarketplaceListingMapper(object):
def listing_to_dict(self, listing):
return {
'type': 'origin_marketplace_listing',
'listing_id': listing.listing_id,
'ipfs_hash': listing.ipfs_hash,
'listing_type': listing.listing_type,
'category': listing.category,
'subcategory': listing.subcategory,
'language': listing.language,
'title': listing.title,
'description': listing.description,
'price': listing.price,
'currency': listing.currency,
'block_number': listing.block_number,
'log_index': listing.log_index
}
class OriginShopProductMapper(object):
def product_to_dict(self, product):
return {
'type': 'origin_shop_product',
'listing_id': product.listing_id,
'product_id': product.product_id,
'ipfs_path': product.ipfs_path,
'external_id': product.external_id,
'parent_external_id': product.parent_external_id,
'title': product.title,
'description': product.description,
'price': product.price,
'currency': product.currency,
'option1': product.option1,
'option2': product.option2,
'option3': product.option3,
'image': product.image,
'block_number': product.block_number,
'log_index': product.log_index
}

View File

@@ -23,7 +23,7 @@
from ethereumetl.domain.receipt import EthReceipt
from ethereumetl.mappers.receipt_log_mapper import EthReceiptLogMapper
from ethereumetl.utils import hex_to_dec, to_normalized_address
from ethereumetl.utils import hex_to_dec, to_normalized_address, to_float_or_none
class EthReceiptMapper(object):
@@ -48,6 +48,15 @@ class EthReceiptMapper(object):
receipt.root = json_dict.get('root')
receipt.status = hex_to_dec(json_dict.get('status'))
receipt.effective_gas_price = hex_to_dec(json_dict.get('effectiveGasPrice'))
receipt.l1_fee = hex_to_dec(json_dict.get('l1Fee'))
receipt.l1_gas_used = hex_to_dec(json_dict.get('l1GasUsed'))
receipt.l1_gas_price = hex_to_dec(json_dict.get('l1GasPrice'))
receipt.l1_fee_scalar = to_float_or_none(json_dict.get('l1FeeScalar'))
receipt.blob_gas_price = hex_to_dec(json_dict.get('blobGasPrice'))
receipt.blob_gas_used = hex_to_dec(json_dict.get('blobGasUsed'))
if 'logs' in json_dict:
receipt.logs = [
self.receipt_log_mapper.json_dict_to_receipt_log(log) for log in json_dict['logs']
@@ -66,5 +75,12 @@ class EthReceiptMapper(object):
'gas_used': receipt.gas_used,
'contract_address': receipt.contract_address,
'root': receipt.root,
'status': receipt.status
'status': receipt.status,
'effective_gas_price': receipt.effective_gas_price,
'l1_fee': receipt.l1_fee,
'l1_gas_used': receipt.l1_gas_used,
'l1_gas_price': receipt.l1_gas_price,
'l1_fee_scalar': receipt.l1_fee_scalar,
'blob_gas_price': receipt.blob_gas_price,
'blob_gas_used': receipt.blob_gas_used
}

View File

@@ -190,4 +190,5 @@ class EthTraceMapper(object):
'error': trace.error,
'status': trace.status,
'trace_id': trace.trace_id,
'trace_index': trace.trace_index,
}

View File

@@ -40,6 +40,14 @@ class EthTransactionMapper(object):
transaction.gas = hex_to_dec(json_dict.get('gas'))
transaction.gas_price = hex_to_dec(json_dict.get('gasPrice'))
transaction.input = json_dict.get('input')
transaction.max_fee_per_gas = hex_to_dec(json_dict.get('maxFeePerGas'))
transaction.max_priority_fee_per_gas = hex_to_dec(json_dict.get('maxPriorityFeePerGas'))
transaction.transaction_type = hex_to_dec(json_dict.get('type'))
transaction.max_fee_per_blob_gas = hex_to_dec(json_dict.get('maxFeePerBlobGas'))
if 'blobVersionedHashes' in json_dict and isinstance(json_dict['blobVersionedHashes'], list):
transaction.blob_versioned_hashes = json_dict['blobVersionedHashes']
return transaction
def transaction_to_dict(self, transaction):
@@ -57,4 +65,9 @@ class EthTransactionMapper(object):
'gas': transaction.gas,
'gas_price': transaction.gas_price,
'input': transaction.input,
'max_fee_per_gas': transaction.max_fee_per_gas,
'max_priority_fee_per_gas': transaction.max_priority_fee_per_gas,
'transaction_type': transaction.transaction_type,
"max_fee_per_blob_gas": transaction.max_fee_per_blob_gas,
"blob_versioned_hashes": transaction.blob_versioned_hashes
}

View File

@@ -25,7 +25,7 @@ import json
import socket
from web3.providers.ipc import IPCProvider
from web3.utils.threads import (
from web3._utils.threads import (
Timeout,
)

View File

@@ -22,7 +22,7 @@
from web3 import HTTPProvider
from web3.utils.request import make_post_request
from web3._utils.request import make_post_request
# Mostly copied from web3.py/providers/rpc.py. Supports batch requests.

View File

@@ -54,7 +54,7 @@ class EthContractService:
c.implements('allowance(address,address)')
# https://github.com/ethereum/EIPs/blob/master/EIPS/eip-721.md
# https://github.com/OpenZeppelin/openzeppelin-solidity/blob/master/contracts/token/ERC721/ERC721Basic.sol
# https://github.com/OpenZeppelin/openzeppelin-contracts/blob/master/contracts/token/ERC721/ERC721.sol
# Doesn't check the below ERC721 methods to match CryptoKitties contract
# getApproved(uint256)
# setApprovalForAll(address,bool)

View File

@@ -40,7 +40,7 @@ class EthService(object):
start_timestamp = int(start_timestamp)
end_timestamp = int(end_timestamp)
if start_timestamp > end_timestamp:
raise ValueError('start_timestamp must be greater or equal to end_timestamp')
raise ValueError('start_timestamp must be lesser than end_timestamp')
try:
start_block_bounds = self._graph_operations.get_bounds_for_y_coordinate(start_timestamp)

View File

@@ -21,10 +21,10 @@
# SOFTWARE.
import logging
from web3.exceptions import BadFunctionCallOutput
from web3.exceptions import BadFunctionCallOutput, ContractLogicError
from ethereumetl.domain.token import EthToken
from ethereumetl.erc20_abi import ERC20_ABI
from ethereumetl.erc20_abi import ERC20_ABI, ERC20_ABI_ALTERNATIVE_1
logger = logging.getLogger('eth_token_service')
@@ -37,11 +37,28 @@ class EthTokenService(object):
def get_token(self, token_address):
checksum_address = self._web3.toChecksumAddress(token_address)
contract = self._web3.eth.contract(address=checksum_address, abi=ERC20_ABI)
contract_alternative_1 = self._web3.eth.contract(address=checksum_address, abi=ERC20_ABI_ALTERNATIVE_1)
symbol = self._call_contract_function(contract.functions.symbol())
name = self._call_contract_function(contract.functions.name())
decimals = self._call_contract_function(contract.functions.decimals())
total_supply = self._call_contract_function(contract.functions.totalSupply())
symbol = self._get_first_result(
contract.functions.symbol(),
contract.functions.SYMBOL(),
contract_alternative_1.functions.symbol(),
contract_alternative_1.functions.SYMBOL(),
)
if isinstance(symbol, bytes):
symbol = self._bytes_to_string(symbol)
name = self._get_first_result(
contract.functions.name(),
contract.functions.NAME(),
contract_alternative_1.functions.name(),
contract_alternative_1.functions.NAME(),
)
if isinstance(name, bytes):
name = self._bytes_to_string(name)
decimals = self._get_first_result(contract.functions.decimals(), contract.functions.DECIMALS())
total_supply = self._get_first_result(contract.functions.totalSupply())
token = EthToken()
token.address = token_address
@@ -52,13 +69,20 @@ class EthTokenService(object):
return token
def _get_first_result(self, *funcs):
for func in funcs:
result = self._call_contract_function(func)
if result is not None:
return result
return None
def _call_contract_function(self, func):
# BadFunctionCallOutput exception happens if the token doesn't implement a particular function
# or was self-destructed
# OverflowError exception happens if the return type of the function doesn't match the expected type
result = call_contract_function(
func=func,
ignore_errors=(BadFunctionCallOutput, OverflowError, ValueError),
ignore_errors=(BadFunctionCallOutput, ContractLogicError, OverflowError, ValueError),
default_value=None)
if self._function_call_result_transformer is not None:
@@ -66,6 +90,23 @@ class EthTokenService(object):
else:
return result
def _bytes_to_string(self, b, ignore_errors=True):
if b is None:
return b
try:
b = b.decode('utf-8')
except UnicodeDecodeError as e:
if ignore_errors:
logger.debug('A UnicodeDecodeError exception occurred while trying to decode bytes to string', exc_info=True)
b = None
else:
raise e
if self._function_call_result_transformer is not None:
b = self._function_call_result_transformer(b)
return b
def call_contract_function(func, ignore_errors, default_value=None):
try:
@@ -73,8 +114,8 @@ def call_contract_function(func, ignore_errors, default_value=None):
return result
except Exception as ex:
if type(ex) in ignore_errors:
logger.exception('An exception occurred in function {} of contract {}. '.format(func.fn_name, func.address)
+ 'This exception can be safely ignored.')
logger.debug('An exception occurred in function {} of contract {}. '.format(func.fn_name, func.address)
+ 'This exception can be safely ignored.', exc_info=True)
return default_value
else:
raise ex

View File

@@ -0,0 +1,63 @@
import base58
import logging
from ethereumetl.utils import hex_to_dec, to_normalized_address
from ethereumetl.ipfs.origin import get_origin_marketplace_data
#
LISTING_CREATED_TOPIC = '0xec3d306143145322b45d2788d826e3b7b9ad062f16e1ec59a5eaba214f96ee3c'
LISTING_UPDATED_TOPIC = '0x470503ad37642fff73a57bac35e69733b6b38281a893f39b50c285aad1f040e0'
PROCESSABLE_TOPICS = [LISTING_CREATED_TOPIC, LISTING_UPDATED_TOPIC]
TOPICS_LEN = 2
logger = logging.getLogger(__name__)
# Helper function. Converts a bytes32 hex string to a base58 encoded ipfs hash.
# For example:
# "0x017dfd85d4f6cb4dcd715a88101f7b1f06cd1e009b2327a0809d01eb9c91f231"
# --> "QmNSUYVKDSvPUnRLKmuxk9diJ6yS96r1TrAXzjTiBcCLAL"
def hex_to_ipfs_hash(param):
data = bytearray.fromhex('1220' + param[2:])
return base58.b58encode(data).decode()
# Helper function. Composes an Origin Protocol fully-qualified listing id.
# Its format is "<ethereum_network_id>-<contract_version>-<marketplace_listing_id>"
# For example:
# "1-001-272" refers to listing 272 on marketplace contract version 1, on Mainnet.
def compose_listing_id(network_id, contract_version, listing_id):
return "{}-{}-{}".format(network_id, contract_version, listing_id)
class OriginEventExtractor(object):
def __init__(self, ipfs_client):
self.ipfs_client = ipfs_client
def extract_event_from_log(self, receipt_log, contract_version):
topics = receipt_log.topics
if (topics is None) or (len(topics) == 0):
logger.warning("Empty topics in log {} of transaction {}".format(
receipt_log.log_index, receipt_log.transaction_hash))
return None, []
topic = topics[0]
if topic not in PROCESSABLE_TOPICS:
logger.debug("Skip processing event with signature {}".format(topic))
return None, []
if len(topics) < TOPICS_LEN:
logger.warning("Unexpected number of topics {} in log {} of transaction {}".format(
len(topics),
receipt_log.log_index,
receipt_log.transaction_hash))
return None, []
listing_id = hex_to_dec(topics[2])
ipfs_hash = hex_to_ipfs_hash(receipt_log.data)
full_listing_id = compose_listing_id(1, contract_version, listing_id)
marketplace_listing, shop_products = get_origin_marketplace_data(receipt_log, full_listing_id, self.ipfs_client, ipfs_hash)
return marketplace_listing, shop_products

View File

@@ -37,11 +37,10 @@ class EthTokenTransferExtractor(object):
topics = receipt_log.topics
if topics is None or len(topics) < 1:
logger.warning("Topics are empty in log {} of transaction {}".format(receipt_log.log_index,
receipt_log.transaction_hash))
# This is normal, topics can be empty for anonymous events
return None
if topics[0] == TRANSFER_EVENT_TOPIC:
if (topics[0]).casefold() == TRANSFER_EVENT_TOPIC:
# Handle unindexed event fields
topics_with_data = topics + split_to_words(receipt_log.data)
# if the number of topics and fields in data part != 4, then it's a weird event

View File

@@ -73,14 +73,26 @@ def enrich_transactions(transactions, receipts):
'input',
'block_timestamp',
'block_number',
'block_hash'
'block_hash',
'max_fee_per_gas',
'max_priority_fee_per_gas',
'transaction_type',
'max_fee_per_blob_gas',
'blob_versioned_hashes'
],
right_fields=[
('cumulative_gas_used', 'receipt_cumulative_gas_used'),
('gas_used', 'receipt_gas_used'),
('contract_address', 'receipt_contract_address'),
('root', 'receipt_root'),
('status', 'receipt_status')
('status', 'receipt_status'),
('effective_gas_price', 'receipt_effective_gas_price'),
('l1_fee', 'receipt_l1_fee'),
('l1_gas_used', 'receipt_l1_gas_used'),
('l1_gas_price', 'receipt_l1_gas_price'),
('l1_fee_scalar', 'receipt_l1_fee_scalar'),
('blob_gas_price', 'receipt_blob_gas_price'),
('blob_gas_used', 'receipt_blob_gas_used')
]))
if len(result) != len(transactions):
@@ -159,7 +171,8 @@ def enrich_traces(blocks, traces):
'status',
'transaction_hash',
'block_number',
'trace_id'
'trace_id',
'trace_index'
],
[
('timestamp', 'block_timestamp'),

View File

@@ -14,7 +14,7 @@ from ethereumetl.streaming.enrich import enrich_transactions, enrich_logs, enric
from ethereumetl.streaming.eth_item_id_calculator import EthItemIdCalculator
from ethereumetl.streaming.eth_item_timestamp_calculator import EthItemTimestampCalculator
from ethereumetl.thread_local_proxy import ThreadLocalProxy
from web3 import Web3
from ethereumetl.web3_utils import build_web3
class EthStreamerAdapter:
@@ -37,7 +37,8 @@ class EthStreamerAdapter:
self.item_exporter.open()
def get_current_block_number(self):
return int(Web3(self.batch_web3_provider).eth.getBlock("latest").number)
w3 = build_web3(self.batch_web3_provider)
return int(w3.eth.getBlock("latest").number)
def export_all(self, start_block, end_block):
# Export blocks and transactions
@@ -87,13 +88,14 @@ class EthStreamerAdapter:
logging.info('Exporting with ' + type(self.item_exporter).__name__)
all_items = enriched_blocks + \
enriched_transactions + \
enriched_logs + \
enriched_token_transfers + \
enriched_traces + \
enriched_contracts + \
enriched_tokens
all_items = \
sort_by(enriched_blocks, 'number') + \
sort_by(enriched_transactions, ('block_number', 'transaction_index')) + \
sort_by(enriched_logs, ('block_number', 'log_index')) + \
sort_by(enriched_token_transfers, ('block_number', 'log_index')) + \
sort_by(enriched_traces, ('block_number', 'trace_index')) + \
sort_by(enriched_contracts, ('block_number',)) + \
sort_by(enriched_tokens, ('block_number',))
self.calculate_item_ids(all_items)
self.calculate_item_timestamps(all_items)
@@ -150,7 +152,7 @@ class EthStreamerAdapter:
start_block=start_block,
end_block=end_block,
batch_size=self.batch_size,
web3=ThreadLocalProxy(lambda: Web3(self.batch_web3_provider)),
web3=ThreadLocalProxy(lambda: build_web3(self.batch_web3_provider)),
max_workers=self.max_workers,
item_exporter=exporter
)
@@ -174,7 +176,7 @@ class EthStreamerAdapter:
exporter = InMemoryItemExporter(item_types=['token'])
job = ExtractTokensJob(
contracts_iterable=contracts,
web3=ThreadLocalProxy(lambda: Web3(self.batch_web3_provider)),
web3=ThreadLocalProxy(lambda: build_web3(self.batch_web3_provider)),
max_workers=self.max_workers,
item_exporter=exporter
)
@@ -219,3 +221,9 @@ class EthStreamerAdapter:
def close(self):
self.item_exporter.close()
def sort_by(arr, fields):
if isinstance(fields, tuple):
fields = tuple(fields)
return sorted(arr, key=lambda item: tuple(item.get(f) for f in fields))

View File

@@ -21,28 +21,52 @@
# SOFTWARE.
from blockchainetl.jobs.exporters.console_item_exporter import ConsoleItemExporter
from blockchainetl.jobs.exporters.multi_item_exporter import MultiItemExporter
def create_item_exporters(outputs):
split_outputs = [output.strip() for output in outputs.split(',')] if outputs else ['console']
item_exporters = [create_item_exporter(output) for output in split_outputs]
return MultiItemExporter(item_exporters)
def create_item_exporter(output):
item_exporter_type = determine_item_exporter_type(output)
if item_exporter_type == ItemExporterType.PUBSUB:
from blockchainetl.jobs.exporters.google_pubsub_item_exporter import GooglePubSubItemExporter
item_exporter = GooglePubSubItemExporter(item_type_to_topic_mapping={
'block': output + '.blocks',
'transaction': output + '.transactions',
'log': output + '.logs',
'token_transfer': output + '.token_transfers',
'trace': output + '.traces',
'contract': output + '.contracts',
'token': output + '.tokens',
})
enable_message_ordering = 'sorted' in output or 'ordered' in output
item_exporter = GooglePubSubItemExporter(
item_type_to_topic_mapping={
'block': output + '.blocks',
'transaction': output + '.transactions',
'log': output + '.logs',
'token_transfer': output + '.token_transfers',
'trace': output + '.traces',
'contract': output + '.contracts',
'token': output + '.tokens',
},
message_attributes=('item_id', 'item_timestamp'),
batch_max_bytes=1024 * 1024 * 5,
batch_max_latency=2,
batch_max_messages=1000,
enable_message_ordering=enable_message_ordering)
elif item_exporter_type == ItemExporterType.KINESIS:
from blockchainetl.jobs.exporters.kinesis_item_exporter import KinesisItemExporter
item_exporter = KinesisItemExporter(
stream_name=output[len('kinesis://'):],
)
elif item_exporter_type == ItemExporterType.POSTGRES:
from blockchainetl.jobs.exporters.postgres_item_exporter import PostgresItemExporter
from blockchainetl.streaming.postgres_utils import create_insert_statement_for_table
from blockchainetl.jobs.exporters.converters.unix_timestamp_item_converter import UnixTimestampItemConverter
from blockchainetl.jobs.exporters.converters.int_to_decimal_item_converter import IntToDecimalItemConverter
from blockchainetl.jobs.exporters.converters.list_field_item_converter import ListFieldItemConverter
from ethereumetl.streaming.postgres_tables import BLOCKS, TRANSACTIONS, LOGS, TOKEN_TRANSFERS, TRACES
from blockchainetl.jobs.exporters.converters.simple_item_converter import SimpleItemConverter
from ethereumetl.streaming.postgres_tables import BLOCKS, TRANSACTIONS, LOGS, TOKEN_TRANSFERS, TRACES, TOKENS, CONTRACTS
def array_to_str(val):
return ','.join(val) if val is not None else None
item_exporter = PostgresItemExporter(
output, item_type_to_insert_stmt_mapping={
@@ -50,23 +74,62 @@ def create_item_exporter(output):
'transaction': create_insert_statement_for_table(TRANSACTIONS),
'log': create_insert_statement_for_table(LOGS),
'token_transfer': create_insert_statement_for_table(TOKEN_TRANSFERS),
'traces': create_insert_statement_for_table(TRACES),
'trace': create_insert_statement_for_table(TRACES),
'token': create_insert_statement_for_table(TOKENS),
'contract': create_insert_statement_for_table(CONTRACTS),
},
converters=[UnixTimestampItemConverter(), IntToDecimalItemConverter(),
ListFieldItemConverter('topics', 'topic', fill=4)])
converters=[
UnixTimestampItemConverter(),
IntToDecimalItemConverter(),
ListFieldItemConverter('topics', 'topic', fill=4),
SimpleItemConverter(field_converters={'blob_versioned_hashes': array_to_str})
])
elif item_exporter_type == ItemExporterType.GCS:
from blockchainetl.jobs.exporters.gcs_item_exporter import GcsItemExporter
bucket, path = get_bucket_and_path_from_gcs_output(output)
item_exporter = GcsItemExporter(bucket=bucket, path=path)
elif item_exporter_type == ItemExporterType.CONSOLE:
item_exporter = ConsoleItemExporter()
elif item_exporter_type == ItemExporterType.KAFKA:
from blockchainetl.jobs.exporters.kafka_exporter import KafkaItemExporter
item_exporter = KafkaItemExporter(output, item_type_to_topic_mapping={
'block': 'blocks',
'transaction': 'transactions',
'log': 'logs',
'token_transfer': 'token_transfers',
'trace': 'traces',
'contract': 'contracts',
'token': 'tokens',
})
else:
raise ValueError('Unable to determine item exporter type for output ' + output)
return item_exporter
def get_bucket_and_path_from_gcs_output(output):
output = output.replace('gs://', '')
bucket_and_path = output.split('/', 1)
bucket = bucket_and_path[0]
if len(bucket_and_path) > 1:
path = bucket_and_path[1]
else:
path = ''
return bucket, path
def determine_item_exporter_type(output):
if output is not None and output.startswith('projects'):
return ItemExporterType.PUBSUB
if output is not None and output.startswith('kinesis://'):
return ItemExporterType.KINESIS
if output is not None and output.startswith('kafka'):
return ItemExporterType.KAFKA
elif output is not None and output.startswith('postgresql'):
return ItemExporterType.POSTGRES
elif output is not None and output.startswith('gs://'):
return ItemExporterType.GCS
elif output is None or output == 'console':
return ItemExporterType.CONSOLE
else:
@@ -75,6 +138,9 @@ def determine_item_exporter_type(output):
class ItemExporterType:
PUBSUB = 'pubsub'
KINESIS = 'kinesis'
POSTGRES = 'postgres'
GCS = 'gcs'
CONSOLE = 'console'
KAFKA = 'kafka'
UNKNOWN = 'unknown'

View File

@@ -20,7 +20,9 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from sqlalchemy import Table, Column, Integer, BigInteger, String, Numeric, MetaData, TIMESTAMP
from sqlalchemy import Table, Column, Integer, BigInteger, Boolean, String, Numeric, \
MetaData, PrimaryKeyConstraint, VARCHAR, TIMESTAMP, Float
from sqlalchemy.dialects.postgresql import ARRAY
metadata = MetaData()
@@ -46,6 +48,10 @@ BLOCKS = Table(
Column('gas_limit', BigInteger),
Column('gas_used', BigInteger),
Column('transaction_count', BigInteger),
Column('base_fee_per_gas', BigInteger),
Column('withdrawals_root', String),
Column('blob_gas_used', BigInteger),
Column('excess_blob_gas', BigInteger),
)
TRANSACTIONS = Table(
@@ -67,6 +73,18 @@ TRANSACTIONS = Table(
Column('block_timestamp', TIMESTAMP),
Column('block_number', BigInteger),
Column('block_hash', String),
Column('max_fee_per_gas', BigInteger),
Column('max_priority_fee_per_gas', BigInteger),
Column('transaction_type', BigInteger),
Column('receipt_effective_gas_price', BigInteger),
Column('receipt_l1_fee', BigInteger),
Column('receipt_l1_gas_used', BigInteger),
Column('receipt_l1_gas_price', BigInteger),
Column('receipt_l1_fee_scalar', Float),
Column('max_fee_per_blob_gas', BigInteger),
Column('blob_versioned_hashes', String),
Column('receipt_blob_gas_price', BigInteger),
Column('receipt_blob_gas_used', BigInteger),
)
LOGS = Table(
@@ -122,4 +140,25 @@ TRACES = Table(
Column('trace_id', String, primary_key=True),
)
TOKENS = Table(
'tokens', metadata,
Column('address', VARCHAR(42)),
Column('name', String),
Column('symbol', String),
Column('decimals', Integer),
Column('function_sighashes', ARRAY(String)),
Column('total_supply', Numeric(78)),
Column('block_number', BigInteger),
PrimaryKeyConstraint('address', 'block_number', name='tokens_pk'),
)
CONTRACTS = Table(
'contracts', metadata,
Column('address', VARCHAR(42)),
Column('bytecode', String),
Column('function_sighashes', ARRAY(String)),
Column('is_erc20', Boolean),
Column('is_erc721', Boolean),
Column('block_number', BigInteger),
PrimaryKeyConstraint('address', 'block_number', name='contracts_pk'),
)

View File

@@ -47,6 +47,16 @@ def to_int_or_none(val):
except ValueError:
return None
def to_float_or_none(val):
if isinstance(val, float):
return val
if val is None or val == "":
return None
try:
return float(val)
except ValueError:
print("can't cast %s to float" % val)
return val
def chunk_string(string, length):
return (string[0 + i:length + i] for i in range(0, len(string), length))

30
ethereumetl/web3_utils.py Normal file
View File

@@ -0,0 +1,30 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from web3 import Web3
from web3.middleware import geth_poa_middleware
def build_web3(provider):
w3 = Web3(provider)
w3.middleware_onion.inject(geth_poa_middleware, layer=0)
return w3

View File

@@ -13,5 +13,7 @@ nav:
- Project:
- Contact Us: contact.md
- Media: media.md
- Citing: citing.md
theme: readthedocs
repo_url: https://github.com/blockchain-etl/ethereum-etl/
edit_uri: edit/develop/docs

View File

@@ -16,19 +16,11 @@ CREATE EXTERNAL TABLE IF NOT EXISTS blocks (
gas_limit BIGINT,
gas_used BIGINT,
timestamp BIGINT,
transaction_count BIGINT
transaction_count BIGINT,
base_fee_per_gas BIGINT
)
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',
'field.delim' = ',',
'escape.delim' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://<your_bucket>/ethereumetl/export/blocks'
TBLPROPERTIES (
'skip.header.line.count' = '1'
);
PARTITIONED BY (block_date STRING)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
LOCATION 's3://<your_bucket>/export/blocks/';
MSCK REPAIR TABLE blocks;
MSCK REPAIR TABLE blocks;

View File

@@ -5,7 +5,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS contracts (
is_erc20 BOOLEAN,
is_erc721 BOOLEAN
)
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
PARTITIONED BY (date STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',

View File

@@ -6,19 +6,10 @@ CREATE EXTERNAL TABLE IF NOT EXISTS logs (
block_number BIGINT,
address STRING,
data STRING,
topics STRING
topics ARRAY<STRING>
)
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',
'field.delim' = ',',
'escape.delim' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://<your_bucket>/ethereumetl/export/logs'
TBLPROPERTIES (
'skip.header.line.count' = '1'
);
PARTITIONED BY (block_date STRING)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
LOCATION 's3://<your_bucket>/export/logs/';
MSCK REPAIR TABLE logs;

View File

@@ -7,19 +7,15 @@ CREATE EXTERNAL TABLE IF NOT EXISTS receipts (
gas_used BIGINT,
contract_address STRING,
root STRING,
status BIGINT
status BIGINT,
effective_gas_price BIGINT,
l1_fee BIGINT,
l1_gas_used BIGINT,
l1_gas_price BIGINT,
l1_fee_scalar DECIMAL
)
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',
'field.delim' = ',',
'escape.delim' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://<your_bucket>/ethereumetl/export/receipts'
TBLPROPERTIES (
'skip.header.line.count' = '1'
);
PARTITIONED BY (block_date STRING)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
LOCATION 's3://<your_bucket>/export/receipts/';
MSCK REPAIR TABLE receipts;

View File

@@ -2,22 +2,13 @@ CREATE EXTERNAL TABLE IF NOT EXISTS token_transfers (
token_address STRING,
from_address STRING,
to_address STRING,
value DECIMAL(38,0),
value STRING,
transaction_hash STRING,
log_index BIGINT,
block_number BIGINT
)
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',
'field.delim' = ',',
'escape.delim' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://<your_bucket>/ethereumetl/export/token_transfers'
TBLPROPERTIES (
'skip.header.line.count' = '1'
);
PARTITIONED BY (block_date STRING)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
LOCATION 's3://<your_bucket>/export/token_transfers/';
MSCK REPAIR TABLE token_transfers;
MSCK REPAIR TABLE token_transfers;

View File

@@ -5,7 +5,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS tokens (
decimals BIGINT,
total_supply DECIMAL(38,0)
)
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
PARTITIONED BY (date STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',

View File

@@ -9,19 +9,13 @@ CREATE EXTERNAL TABLE IF NOT EXISTS transactions (
value DECIMAL(38,0),
gas BIGINT,
gas_price BIGINT,
input STRING
input STRING,
max_fee_per_gas BIGINT,
max_priority_fee_per_gas BIGINT,
transaction_type BIGINT
)
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',
'field.delim' = ',',
'escape.delim' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://<your_bucket>/ethereumetl/export/transactions'
TBLPROPERTIES (
'skip.header.line.count' = '1'
);
PARTITIONED BY (block_date STRING)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
LOCATION 's3://<your_bucket>/export/transactions/';
MSCK REPAIR TABLE transactions;
MSCK REPAIR TABLE transactions;

View File

@@ -1,34 +0,0 @@
CREATE EXTERNAL TABLE IF NOT EXISTS blocks (
number BIGINT,
hash STRING,
parent_hash STRING,
nonce STRING,
sha3_uncles STRING,
logs_bloom STRING,
transactions_root STRING,
state_root STRING,
receipts_root STRING,
miner STRING,
difficulty DECIMAL(38,0),
total_difficulty DECIMAL(38,0),
size BIGINT,
extra_data STRING,
gas_limit BIGINT,
gas_used BIGINT,
timestamp BIGINT,
transaction_count BIGINT
)
PARTITIONED BY (date STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',
'field.delim' = ',',
'escape.delim' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://<your_bucket>/ethereumetl/export/blocks'
TBLPROPERTIES (
'skip.header.line.count' = '1'
);
MSCK REPAIR TABLE blocks;

View File

@@ -1,21 +0,0 @@
CREATE EXTERNAL TABLE IF NOT EXISTS contracts (
address STRING,
bytecode STRING,
function_sighashes STRING,
is_erc20 BOOLEAN,
is_erc721 BOOLEAN
)
PARTITIONED BY (date STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',
'field.delim' = ',',
'escape.delim' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://<your_bucket>/ethereumetl/export/contracts'
TBLPROPERTIES (
'skip.header.line.count' = '1'
);
MSCK REPAIR TABLE contracts;

View File

@@ -1,24 +0,0 @@
CREATE EXTERNAL TABLE IF NOT EXISTS logs (
log_index BIGINT,
transaction_hash STRING,
transaction_index BIGINT,
block_hash STRING,
block_number BIGINT,
address STRING,
data STRING,
topics STRING
)
PARTITIONED BY (date STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',
'field.delim' = ',',
'escape.delim' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://<your_bucket>/ethereumetl/export/logs'
TBLPROPERTIES (
'skip.header.line.count' = '1'
);
MSCK REPAIR TABLE logs;

View File

@@ -1,25 +0,0 @@
CREATE EXTERNAL TABLE IF NOT EXISTS parquet_blocks (
number BIGINT,
hash STRING,
parent_hash STRING,
nonce STRING,
sha3_uncles STRING,
logs_bloom STRING,
transactions_root STRING,
state_root STRING,
receipts_root STRING,
miner STRING,
difficulty DECIMAL(38,0),
total_difficulty DECIMAL(38,0),
size BIGINT,
extra_data STRING,
gas_limit BIGINT,
gas_used BIGINT,
timestamp BIGINT,
transaction_count BIGINT
)
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
STORED AS PARQUET
LOCATION 's3://<your_bucket>/ethereumetl/parquet/blocks';
MSCK REPAIR TABLE parquet_blocks;

View File

@@ -1,14 +0,0 @@
CREATE EXTERNAL TABLE IF NOT EXISTS parquet_token_transfers (
token_address STRING,
from_address STRING,
to_address STRING,
value DECIMAL(38,0),
transaction_hash STRING,
log_index BIGINT,
block_number BIGINT
)
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
STORED AS PARQUET
LOCATION 's3://<your_bucket>/ethereumetl/parquet/token_transfers';
MSCK REPAIR TABLE parquet_token_transfers;

View File

@@ -1,18 +0,0 @@
CREATE EXTERNAL TABLE IF NOT EXISTS parquet_transactions (
hash STRING,
nonce BIGINT,
block_hash STRING,
block_number BIGINT,
transaction_index BIGINT,
from_address STRING,
to_address STRING,
value DECIMAL(38,0),
gas BIGINT,
gas_price BIGINT,
input STRING
)
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
STORED AS PARQUET
LOCATION 's3://<your_bucket>/ethereumetl/parquet/transactions';
MSCK REPAIR TABLE parquet_transactions;

View File

@@ -1,25 +0,0 @@
CREATE EXTERNAL TABLE IF NOT EXISTS receipts (
transaction_hash STRING,
transaction_index BIGINT,
block_hash STRING,
block_number BIGINT,
cumulative_gas_used BIGINT,
gas_used BIGINT,
contract_address STRING,
root STRING,
status BIGINT
)
PARTITIONED BY (date STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',
'field.delim' = ',',
'escape.delim' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://<your_bucket>/ethereumetl/export/receipts'
TBLPROPERTIES (
'skip.header.line.count' = '1'
);
MSCK REPAIR TABLE receipts;

View File

@@ -1,23 +0,0 @@
CREATE EXTERNAL TABLE IF NOT EXISTS token_transfers (
token_address STRING,
from_address STRING,
to_address STRING,
value DECIMAL(38,0),
transaction_hash STRING,
log_index BIGINT,
block_number BIGINT
)
PARTITIONED BY (date STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',
'field.delim' = ',',
'escape.delim' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://<your_bucket>/ethereumetl/export/token_transfers'
TBLPROPERTIES (
'skip.header.line.count' = '1'
);
MSCK REPAIR TABLE token_transfers;

View File

@@ -1,21 +0,0 @@
CREATE EXTERNAL TABLE IF NOT EXISTS tokens (
address STRING,
symbol STRING,
name STRING,
decimals BIGINT,
total_supply DECIMAL(38,0)
)
PARTITIONED BY (date STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = ',',
'field.delim' = ',',
'escape.delim' = '\\'
)
STORED AS TEXTFILE
LOCATION 's3://<your_bucket>/ethereumetl/export/tokens'
TBLPROPERTIES (
'skip.header.line.count' = '1'
);
MSCK REPAIR TABLE tokens;

Some files were not shown because too many files have changed in this diff Show More