Compare commits

..

256 Commits

Author SHA1 Message Date
medvedev1088
974b2bedd1 Script for exporting balances 2021-06-24 13:58:33 +07:00
medvedev1088
ecc4484034 Enable message ordering if topic name contains sorted 2021-06-22 18:58:26 +07:00
medvedev1088
48f11fc9e1 Add export to GCS and message ordering in pubsub 2021-01-09 19:34:53 +07:00
medvedev1088
511b60ecfa Enable message ordering for pubsub exporter 2020-12-06 19:42:29 +07:00
medvedev1088
fcf576f6bc Add link to Ethereum 2.0 ETL to README 2020-10-26 19:13:03 +07:00
medvedev1088
15b0f683b9 Add Programming Language Python 3.8 to setup.py 2020-10-09 17:17:42 +07:00
medvedev1088
742e78b7f7 Bump version 2020-10-09 17:14:57 +07:00
Evgeny Medvedev
68f6bec10b Merge pull request #225 from blockchain-etl/feature/python38
Add py38 in setup.py and tests
2020-10-09 17:13:31 +07:00
medvedev1088
04b179aadf Add py38 setup.py and tests 2020-10-09 17:02:00 +07:00
medvedev1088
8d159a58c0 Update docs 2020-10-03 18:27:13 +07:00
medvedev1088
10087aecbb Remove latest tag from dockerhub workflow 2020-08-21 20:42:25 +07:00
medvedev1088
e340074ce6 Bump the version 2020-08-21 20:05:04 +07:00
Evgeny Medvedev
a74f53f351 Merge pull request #222 from blockchain-etl/bug/tokens_param_recognizes_single_value
Fix --tokens in export_token_transfers.py recognizes only 1 parameter
2020-08-21 20:03:08 +07:00
medvedev1088
e61248e798 Fix --tokens in export_token_transfers.py recognizes only 1 parameter 2020-08-21 19:43:19 +07:00
medvedev1088
e78a856438 Update Infura id 2020-08-14 19:35:39 +07:00
medvedev1088
40b98215b6 Update citing.md 2020-07-22 15:01:57 +07:00
medvedev1088
c19bdf053f Fix extra comma in citing.md 2020-07-22 00:10:13 +07:00
medvedev1088
8ccb6dfe77 Merge remote-tracking branch 'origin/develop' into develop 2020-07-22 00:08:40 +07:00
medvedev1088
4ce02de2e0 Add Citing section to the docs 2020-07-22 00:08:28 +07:00
Evgeny Medvedev
56d232781a Merge pull request #214 from franckc/originprotocol
Add support for extracting Origin Protocol data
2020-06-14 19:54:21 +07:00
Franck Chastagnol
c5a67b0fd4 Use null rather than empty string as default for shop product fields 2020-06-08 09:39:20 -07:00
Franck Chastagnol
2498bf5560 Fix unit tests 2020-06-07 23:21:07 -07:00
Franck Chastagnol
4c0a06fc36 Minor fixes 2020-06-07 22:45:31 -07:00
Franck Chastagnol
101f0dbd67 Add dependency on requests package 2020-06-07 22:09:32 -07:00
Franck Chastagnol
bc40a13ec6 Merge branch 'develop' into originprotocol 2020-06-07 22:01:42 -07:00
Franck Chastagnol
1bca49b31f Clean up, Add unit tests 2020-06-07 21:55:07 -07:00
Evgeny Medvedev
8df8407137 Merge pull request #217 from blockchain-etl/fix/update-nansen-url
Updated Nansen link in docs
2020-05-25 23:40:51 +07:00
askeluv
4958c1e264 Updated Nansen link in docs 2020-05-25 18:00:01 +02:00
medvedev1088
60f5340754 Add a link to Ivan on Tech video 2020-05-23 16:59:00 +07:00
Franck Chastagnol
c84a6d1195 Extract origin protocol data 2020-05-15 19:11:52 -07:00
Evgeny Medvedev
04bc4a888b Merge pull request #212 from blockchain-etl/fix/update-project-links
Updated Nansen link + added projects to README.md
2020-05-05 21:01:24 +07:00
askeluv
84886c7f48 Updated Nansen link + added projects to README.md 2020-05-05 15:44:44 +02:00
Evgeny Medvedev
c1e5691d1d Merge pull request #210 from blockchain-etl/feature/publish_to_dockerhub_workflow
Add publish-to-dockerhub.yml
2020-04-21 21:55:43 +07:00
medvedev1088
16dfcb24ed Fix organization name in publish-to-dockerhub.yml 2020-04-16 23:58:41 +07:00
medvedev1088
8164ee105d Add publish-to-dockerhub.yml 2020-04-16 23:49:12 +07:00
medvedev1088
ac866f6459 Update README 2020-04-16 23:25:30 +07:00
medvedev1088
90c4982a6b Add Infura project id to commands in docs 2020-04-16 23:23:17 +07:00
medvedev1088
ae131baa0e Update docs 2020-04-16 23:09:32 +07:00
medvedev1088
cb3ee69123 Bump the version 2020-04-16 23:06:04 +07:00
Evgeny Medvedev
81374dea00 Merge pull request #209 from blockchain-etl/feature/publish_to_pypi_workflow
Feature/publish to pypi workflow
2020-04-16 23:04:45 +07:00
medvedev1088
71364a4fea Run pypi workflow only on tags push 2020-04-16 23:04:26 +07:00
medvedev1088
d612ba40b8 Add publishing to pypi 2020-04-16 23:00:57 +07:00
medvedev1088
e853d4fd19 Fix file formatting 2020-04-16 22:47:01 +07:00
medvedev1088
82045cc21c Add publish to PyPi workflow 2020-04-16 22:44:41 +07:00
Evgeny Medvedev
eeabd57b98 Merge pull request #208 from obsh/develop
Add block_timestamp attribute to exported PubSub message
2020-04-16 21:59:13 +07:00
oleksandr.bushkovskyi
dec070e812 Update streaming tests to take into account item_timestamp attribute 2020-04-16 00:31:18 +03:00
oleksandr.bushkovskyi
156b603cb0 Add item_timestamp attribute in RFC 3339 format to exported PubSub messages 2020-04-15 23:48:23 +03:00
oleksandr.bushkovskyi
141c82005a Add block_timestamp attribute to exported PubSub message 2020-04-09 00:20:09 +03:00
medvedev1088
e0636bbb31 Bump version 2020-04-04 00:06:08 +07:00
Evgeny Medvedev
b65f37af7b Merge pull request #207 from blockchain-etl/feature/update_dependency_versions
Update eth-utils and eth-abi versions
2020-04-04 00:00:58 +07:00
medvedev1088
1a6c417ab0 Update eth-utils and eth-abi versions 2020-04-03 23:58:00 +07:00
medvedev1088
5a09102eb2 Update schema.md 2020-03-17 23:55:44 +07:00
medvedev1088
f05ce47b95 Update commands.md 2020-03-13 22:31:08 +07:00
Evgeny Medvedev
51927defc7 Merge pull request #203 from blockchain-etl/feature/postgres
Postgres support for stream command
2020-03-13 22:13:42 +07:00
medvedev1088
4b92d7b670 Bump the version 2020-03-13 21:42:31 +07:00
medvedev1088
0c4342fe11 Add trace_id to traces postgres table 2020-03-13 21:37:45 +07:00
medvedev1088
41f20435a2 Update docs 2020-03-09 20:37:38 +07:00
medvedev1088
676dfb22c5 Update commands.md 2020-03-09 17:00:00 +07:00
medvedev1088
7cf0f34785 Validate entity types for streaming 2020-03-09 16:59:49 +07:00
medvedev1088
d46528ba24 Rename ListFieldItemConverter 2020-03-09 14:55:54 +07:00
medvedev1088
87e6b57024 Add transactions, logs, traces support for postgres exporter 2020-03-06 16:42:10 +07:00
medvedev1088
70db781856 Add BLOCKS table schema 2020-03-05 22:31:21 +07:00
medvedev1088
f5836345cd Remove Dockerfile_with_streaming from README 2020-03-05 22:15:38 +07:00
medvedev1088
d7ac8fb758 Add PostgresItemExporter 2020-03-05 21:50:55 +07:00
medvedev1088
ded7a6a007 Merge branch 'develop' into feature/postgres 2020-03-05 19:10:44 +07:00
medvedev1088
dd8d2bdc38 Update links to docs 2020-03-05 13:10:01 +07:00
medvedev1088
093fe56dde Add postgres dependencies 2020-03-05 13:07:58 +07:00
Evgeny Medvedev
68fce399a8 Merge pull request #202 from blockchain-etl/feature/add-docs
Add docs folder using mkdocs
2020-03-05 12:52:45 +07:00
medvedev1088
438b911b0f Remove Tests and Running in Docker from Useful Links 2020-03-04 19:59:41 +07:00
medvedev1088
dae8deff36 Add Documentation to Useful Links 2020-03-04 19:58:12 +07:00
medvedev1088
cb84071680 Add link to Awesome BigQuery Views 2020-03-04 19:56:39 +07:00
medvedev1088
d882c64671 Add more Media links 2020-03-04 19:53:16 +07:00
medvedev1088
477eb35a39 Merge branch 'develop' into feature/add-docs
# Conflicts:
#	README.md
2020-03-04 19:47:09 +07:00
medvedev1088
ab7fd89774 Update README 2020-03-04 19:45:39 +07:00
medvedev1088
064353a993 Move Running Tests and Running in Docker to README as they depend on repo contents 2020-03-04 19:42:32 +07:00
medvedev1088
94b7ce8a4c Add Useful Links to README 2020-03-04 19:34:00 +07:00
askeluv
c8e4c840d5 Minor tweaks to documentation 2020-02-26 16:52:11 +01:00
askeluv
136ed3232a Fixed internal linking 2020-02-26 16:30:44 +01:00
askeluv
cf8c6edfb7 Testing different way to do internal linking 2020-02-26 16:25:24 +01:00
askeluv
e90e70e94f Fix broken link to /commands 2020-02-26 16:22:57 +01:00
askeluv
64614c2670 Fixed broken link 2020-02-25 17:06:13 +01:00
askeluv
1ffb592771 Minor tweaks; brought back README.md quickstart 2020-02-25 17:03:04 +01:00
askeluv
030c460f36 Fixed links 2020-02-25 16:11:51 +01:00
askeluv
92db79b8a7 Moved documentation from README.md into mkdocs docs/ folder 2020-02-25 15:48:07 +01:00
medvedev1088
2d37486970 Add link to ConsenSys Grants announcement 2020-02-23 18:00:48 +07:00
Evgeny Medvedev
499596ad3e Merge pull request #198 from blockchain-etl/feature/remove_dockerfile_with_streaming
Remove Dockerfile_with_streaming to avoid confusion
2020-02-14 14:26:08 +07:00
medvedev1088
106de42844 Remove -streaming suffix from docker tags 2020-02-14 14:25:48 +07:00
medvedev1088
aa106467b8 Remove Dockerfile_with_streaming to avoid confusion 2020-02-14 14:21:00 +07:00
Evgeny Medvedev
e53dbe13f9 Merge pull request #197 from blockchain-etl/feature/show_defaults_in_help
Show default values for click commands when using --help
2020-02-09 17:42:00 +07:00
medvedev1088
38752a557a Show default values for click commands when using --help 2020-02-09 17:36:28 +07:00
medvedev1088
e8b6fe742e Update FUNDING.yaml 2020-01-09 16:22:57 +07:00
medvedev1088
d1e2f83071 Update README 2019-12-16 11:04:57 +07:00
Evgeny Medvedev
69c64e048e Update README.md 2019-12-15 00:03:35 +07:00
medvedev1088
1d4aa94d81 Add link to Discord 2019-12-14 23:52:32 +07:00
Evgeny Medvedev
2b23e08a64 Merge pull request #189 from blockchain-etl/feature/log_token_address_on_exception
Add function name and contract address in log message
2019-10-23 19:05:37 +07:00
medvedev1088
7434d149bb Add function name and contract address in log message when function call failed. Related to https://github.com/blockchain-etl/ethereum-etl/issues/159#issuecomment-526910436 2019-09-01 20:01:01 +07:00
Evgeny Medvedev
eab288d507 Add link to Kaggle dataset 2019-08-09 19:42:56 +07:00
Evgeny Medvedev
091c7edd60 Add link to Snowflake tutorial 2019-08-09 19:35:43 +07:00
Evgeny Medvedev
0373f48956 Update link to useful queries 2019-07-27 20:34:33 +07:00
Evgeny Medvedev
32eae84170 Add link to useful queries 2019-07-27 20:24:27 +07:00
Evgeny Medvedev
359fe17ac3 Merge pull request #176 from blockchain-etl/funding
Create FUNDING.yml
2019-07-25 19:24:12 +07:00
Evgeny Medvedev
19daa86e52 Merge pull request #185 from blockchain-etl/develop
develop to master
2019-07-25 19:22:36 +07:00
Evgeny Medvedev
e428bead6d Update FUNDING.yml 2019-07-25 19:21:38 +07:00
Evgeny Medvedev
ee5de4b465 Merge branch 'develop' into funding 2019-07-25 19:20:17 +07:00
Evgeny Medvedev
ee8c68d215 Merge branch 'master' into develop 2019-07-25 19:18:42 +07:00
Evgeny Medvedev
76cdec4a5c Merge branch 'develop' into funding 2019-07-25 19:14:56 +07:00
Evgeny Medvedev
7d9892de85 Merge pull request #184 from blockchain-etl/feature/item_id
Add possibility to specify multiple provider uris for streaming
2019-07-25 19:09:20 +07:00
Evgeny Medvedev
faffca21ef Create FUNDING.yml 2019-06-13 16:41:18 +07:00
Evgeny Medvedev
a74ab02563 Merge pull request #170 from blockchain-etl/bug/153
Fix https://github.com/blockchain-etl/ethereum-etl/issues/153
2019-06-05 19:02:29 +07:00
Evgeny Medvedev
8daa06d007 Rename constant in batch_work_executor.py 2019-06-05 19:01:56 +07:00
Evgeny Medvedev
2ab3b7e9bf Fix https://github.com/blockchain-etl/ethereum-etl/issues/153 2019-06-05 18:57:26 +07:00
Evgeny Medvedev
3234f64c45 Add possibility to specify multiple provider uris for streaming 2019-05-10 19:18:37 +07:00
Evgeny Medvedev
437718083e Merge pull request #172 from blockchain-etl/feature/item_id
Add item_id to stream output
2019-05-08 20:20:01 +07:00
Evgeny Medvedev
0f28aee915 Bump version 2019-05-08 20:14:48 +07:00
Evgeny Medvedev
5e311b87da Fix google_pubsub_item_exporter.py 2019-05-08 20:12:16 +07:00
Evgeny Medvedev
fdea8ca36e Add item_id to streamer messages 2019-05-07 21:05:08 +07:00
Evgeny Medvedev
ca8cd55223 Fix https://github.com/blockchain-etl/ethereum-etl/issues/153 2019-04-25 21:06:23 +07:00
Evgeny Medvedev
f4586b1501 Update README 2019-04-22 22:14:07 +07:00
Evgeny Medvedev
f49b46363e Update README 2019-04-22 22:11:37 +07:00
Evgeny Medvedev
40d4cf374c Fix variable name 2019-04-19 20:09:56 +07:00
Evgeny Medvedev
031c5acedf Update README 2019-04-17 18:47:18 +07:00
Evgeny Medvedev
f4718a6cb9 Added link to D5 2019-04-17 18:36:35 +07:00
Evgeny Medvedev
f35b4ecde4 Update README 2019-04-16 01:12:15 +07:00
Evgeny Medvedev
8257c4bde5 Update README 2019-04-16 00:57:48 +07:00
Evgeny Medvedev
8b21e34250 Update README 2019-04-16 00:29:04 +07:00
Evgeny Medvedev
e8ea43067a Update README 2019-04-16 00:22:35 +07:00
Evgeny Medvedev
e695c55704 Merge pull request #160 from blockchain-etl/feature/streaming
Feature/streaming
2019-04-15 20:18:02 +07:00
Evgeny Medvedev
5c941a403e Bump version 2019-04-15 20:10:57 +07:00
Evgeny Medvedev
67b9ef1728 Refactor dockerhub.md 2019-04-15 20:10:44 +07:00
Evgeny Medvedev
3d5c5a3c73 Update README 2019-04-15 20:10:32 +07:00
Evgeny Medvedev
fa81a41ae5 Refactoring 2019-04-15 19:02:30 +07:00
Evgeny Medvedev
fcd963ced6 Update README 2019-04-15 18:38:34 +07:00
Evgeny Medvedev
e69148ca9e Update README 2019-04-15 18:08:45 +07:00
Evgeny Medvedev
143f59018f Merge branch 'develop' into feature/streaming
# Conflicts:
#	tests/ethereumetl/job/mock_web3_provider.py
2019-04-13 21:57:11 +07:00
Evgeny Medvedev
b46717bf2b Revert changing test file names 2019-04-13 21:56:53 +07:00
Evgeny Medvedev
66971c82e8 Revert using traceFilter https://github.com/blockchain-etl/ethereum-etl/pull/164#issuecomment-482814833 2019-04-13 21:55:48 +07:00
Evgeny Medvedev
040a42dba5 Change block enrichment in eth_streamer_adapter.py 2019-04-13 21:18:48 +07:00
Evgeny Medvedev
2e0b59553c Fix test file names 2019-04-13 21:15:41 +07:00
Evgeny Medvedev
26bcb6c9d8 Merge branch 'develop' into feature/streaming
# Conflicts:
#	tests/ethereumetl/job/mock_web3_provider.py
2019-04-13 21:10:14 +07:00
Evgeny Medvedev
e82618d1c2 Change default value for --batch-size in export_traces.py 2019-04-13 21:09:28 +07:00
Evgeny Medvedev
e6c055c3fa Merge pull request #164 from t2y/use-trace-filter
Use traceFilter instead of traceBlock
2019-04-13 21:07:08 +07:00
Evgeny Medvedev
925471b064 Change default value for block_timestamp in transaction_mapper.py 2019-04-13 21:02:52 +07:00
Evgeny Medvedev
af72640c37 Merge pull request #163 from t2y/add-block-timestamp-to-transaction
Add block timestamp to transactions.csv
2019-04-13 20:55:29 +07:00
Tetsuya Morimoto
a44637f430 change block_timestamp column position to last column to minimize breaking compatibility 2019-04-13 19:14:48 +09:00
Tetsuya Morimoto
a446b55453 add block timestamp to transactions.csv 2019-04-12 20:48:15 +09:00
Evgeny Medvedev
9072abf55d Fix filename capitalization 2 2019-04-09 22:35:18 +07:00
Evgeny Medvedev
c6118be5a5 Fix filename capitalization 1 2019-04-09 22:33:18 +07:00
Evgeny Medvedev
4ed17d4980 Refactor mock file naming 2019-04-09 22:25:22 +07:00
Evgeny Medvedev
1bf2553aed Fix tests 2019-04-09 21:58:18 +07:00
Evgeny Medvedev
04b34c5dd5 Add link to stackoverflow question 2019-04-09 19:13:00 +07:00
Evgeny Medvedev
9614aeba7f Fix exception when only log specified for -e option 2019-04-09 14:55:34 +07:00
Tetsuya Morimoto
eba4e4e58e applied a reverse patch from 0b3f4d6 since it seems paritytech/parity-ethereum/issues/9822 was fixed 2019-04-09 08:59:54 +09:00
Evgeny Medvedev
c5d155b617 Fix trace status calculation 2019-04-07 21:58:18 +07:00
Evgeny Medvedev
418b7a83d3 Fix timeout error handling 2019-04-07 12:04:46 +07:00
Evgeny Medvedev
4fccd2c181 Fix trace status 2019-04-07 12:04:25 +07:00
Evgeny Medvedev
f07752907a Add extract_tokens command 2019-04-06 20:32:26 +07:00
Evgeny Medvedev
140af3e649 Fix csv max field size in extract_contracts 2019-04-06 15:09:19 +07:00
Evgeny Medvedev
c9fa2a1873 Add pid file to streamer 2019-04-05 14:20:32 +07:00
Evgeny Medvedev
7214d771b9 Increase timeout 2019-04-02 18:08:30 +07:00
Evgeny Medvedev
a2a48f9642 Fix timeout in pubsub exporter 2019-04-02 17:58:08 +07:00
Evgeny Medvedev
ad8fda002e Merge branch 'develop' into feature/streaming 2019-04-02 14:02:55 +07:00
Evgeny Medvedev
99803a772e Disable slow tests in tox 2019-04-01 17:31:29 +07:00
Evgeny Medvedev
1defa289e5 Use comma-separated list for --entity-types option 2019-04-01 14:08:06 +07:00
Evgeny Medvedev
7f725182aa Merge pull request #161 from SteveVitali/patch-1
Update export_all.sh with ethereumetl commands
2019-04-01 14:06:40 +07:00
Steven Vitali
7afe6093b0 Update export_all.sh with ethereumetl commands 2019-04-01 02:58:04 -04:00
Evgeny Medvedev
4465222622 Refactor blockchainetl package 2019-03-30 15:20:26 +07:00
Evgeny Medvedev
2f8d901829 Refactor streamer 2019-03-30 15:12:34 +07:00
Evgeny Medvedev
e27b5c28fd Fix the tests 2019-03-28 17:35:01 +07:00
Evgeny Medvedev
47bd5957d4 Fix tests 2019-03-28 01:12:52 +07:00
Evgeny Medvedev
edc3211544 Fix extract_contracts job 2019-03-28 01:07:45 +07:00
Evgeny Medvedev
a9ee19f871 Update README 2019-03-27 23:33:35 +07:00
Evgeny Medvedev
c5ea25a200 Add timeout for sync cycle 2019-03-27 23:20:15 +07:00
Evgeny Medvedev
81033022b9 Update pubsub exporter 2019-03-27 22:21:28 +07:00
Evgeny Medvedev
ac60502f72 Configure logging 2019-03-27 22:07:16 +07:00
Evgeny Medvedev
9dfff1261d Add extract_contracts command 2019-03-27 21:23:21 +07:00
Evgeny Medvedev
69cc8a70c0 Add trace status calculation 2019-03-27 21:11:54 +07:00
Evgeny Medvedev
ba60c906f5 Add tests for streaming traces 2019-03-27 16:00:28 +07:00
Evgeny Medvedev
751f9b57ac Add entity types 2019-03-27 13:36:46 +07:00
Evgeny Medvedev
a9672ac9c1 Refactor Streamer 2019-03-26 22:09:05 +07:00
Evgeny Medvedev
ea6d0e87da Add streaming tests 2019-03-26 18:05:48 +07:00
Evgeny Medvedev
22e6795789 Remove unused file 2019-03-26 14:33:37 +07:00
Evgeny Medvedev
302fbc9947 Update dependencies versions 2019-03-26 14:30:46 +07:00
Evgeny Medvedev
3483d77aa4 Merge branch 'develop' into feature/streaming 2019-03-26 13:48:21 +07:00
Evgeny Medvedev
871af57840 Update README 2019-03-22 00:35:51 +07:00
Evgeny Medvedev
c76d25bf3f Update README 2019-03-12 21:34:35 +07:00
Evgeny Medvedev
2c3ece7010 Merge pull request #158 from blockchain-etl/develop
Updates to README, fix dependencies conflict, add timeout to export_traces, refactor file utils
2019-03-03 14:54:26 +07:00
Evgeny Medvedev
930efe5a0e Bump version 2019-03-03 14:48:23 +07:00
Evgeny Medvedev
aac00bf7d0 Add documentation to commands 2019-03-03 14:48:10 +07:00
Evgeny Medvedev
6f19ff0756 Merge pull request #157 from blockchain-etl/feature/remove-legacy-scripts
Remove legacy files
2019-03-03 14:38:20 +07:00
Evgeny Medvedev
f18f303fa9 Merge pull request #156 from tpmccallum/patch-3
Update setup.py
2019-03-03 14:37:25 +07:00
Evgeny Medvedev
b5e290e2c1 Remove legacy files 2019-03-03 14:34:34 +07:00
Timothy McCallum
a10fb2fac9 Update eth utils in setup.py
Updated from eth-utils>=1.2.0 to eth-utils==1.3.0

Also ran all of the installation and tests again and everything passed!
2019-03-03 17:34:23 +10:00
Evgeny Medvedev
83a7b5383f Merge pull request #155 from tpmccallum/patch-2
Changing python and pip -> python3 and pip3
2019-03-03 14:12:11 +07:00
Timothy McCallum
978513efc0 Update setup.py
I was getting this error
```
eth-keys 0.2.1 has requirement eth-utils<2.0.0,>=1.3.0, but you'll have eth-utils 1.2.0 which is incompatible.
```
It relates to the following issue
https://github.com/blockchain-etl/ethereum-etl/issues/141
which has the following fix
bde116ad06
I just tested it and also created this PR which you can now merge.
2019-03-03 13:48:24 +10:00
Timothy McCallum
65f5de1df1 Changing python and pip -> python3 and pip3 2019-03-03 13:41:49 +10:00
Evgeny Medvedev
df10702486 Add links to articles 2019-02-27 20:00:06 +07:00
Evgeny Medvedev
a288b51b73 Merge pull request #152 from blockchain-etl/feature/export_traces_timeout_option
Add timeout option to export_traces
2019-02-27 19:55:51 +07:00
Evgeny Medvedev
a6337d0817 Add timeout option to export_traces 2019-02-21 18:29:13 +07:00
Evgeny Medvedev
d63713ece1 Update Docker image tag for streaming 2019-02-18 17:45:59 +07:00
Evgeny Medvedev
ed2466d16d Update Docker image tag for streaming 2019-02-18 17:45:09 +07:00
Evgeny Medvedev
aab657da9b Add comments 2019-02-15 21:55:04 +07:00
Evgeny Medvedev
79b9a46bae Remove unused file 2019-02-15 18:03:28 +07:00
Evgeny Medvedev
cac7305f53 Refactor streaming 2019-02-15 17:10:06 +07:00
Evgeny Medvedev
80cd37bdde Remove requirements.txt 2019-02-15 16:15:42 +07:00
Evgeny Medvedev
ff4218c0b8 Merge branch 'develop' into feature/streaming 2019-02-15 16:15:15 +07:00
Evgeny Medvedev
f50cc7253b Merge branch 'master' into feature/streaming
# Conflicts:
#	.dockerignore
#	Dockerfile
#	requirements.txt
2019-02-15 16:11:22 +07:00
Evgeny Medvedev
4fc495342b Update LICENSE 2019-02-04 15:44:15 +07:00
Evgeny Medvedev
b0a5e02dd5 Update README 2019-02-04 15:25:52 +07:00
Evgeny Medvedev
f7af95d6c7 Update the version 2019-02-04 15:25:30 +07:00
Evgeny Medvedev
706eb8a9c9 Refactor misc utils 2019-01-30 16:04:23 +07:00
medvedev1088
e30e58f032 Merge pull request #144 from blockchain-etl/develop
Ethereum Classic Support, Python 3.5 support, Bug fixes
2019-01-19 01:00:53 +07:00
Evgeny Medvedev
3b866f4f32 Provide default value for chain argument for cli command callbacks 2019-01-17 17:48:20 +07:00
Evgeny Medvedev
d437f58eb9 Add exception logging to EthTokenService 2019-01-17 17:46:56 +07:00
medvedev1088
ecea237187 Merge pull request #115 from blockchain-etl/bug/value-error-export-tokens
Fix ValueError when exporting contracts
2019-01-17 13:52:35 +07:00
Evgeny Medvedev
aa1a0ee32a Update README 2018-12-13 16:29:59 +07:00
Evgeny Medvedev
4c3d67d442 Update README 2018-12-13 16:29:06 +07:00
Evgeny Medvedev
061f131919 Retry requests when node is not synced 2018-11-18 23:43:23 +07:00
Evgeny Medvedev
1e793f3d48 Add Ethereum Classic Support to table of contents 2018-11-18 23:05:45 +07:00
Evgeny Medvedev
3876957917 Update README 2018-11-18 23:03:37 +07:00
Evgeny Medvedev
76879e593d Fix typo 2018-11-18 23:01:34 +07:00
medvedev1088
f9b353d803 Merge pull request #131 from YazzyYaz/develop
Add chain command line argument for EVM Agnostic
2018-11-18 23:00:31 +07:00
Yaz Khoury
fb2c7fb149 feat: Default classic chain to https://ethereumclassic.network if not specified 2018-11-16 14:48:26 -05:00
Yaz Khoury
21808fb1c8 doc: Update README with classic info 2018-11-16 14:05:49 -05:00
Yaz Khoury
a4a15cb534 refactor: Move classic infura check function into utils.py 2018-11-16 14:01:50 -05:00
Yaz Khoury
04aa34dca4 feat: Add cli arg for chain network type 2018-11-16 12:33:24 -05:00
Evgeny Medvedev
5c98d95a5a Bump version and update README 2018-11-16 01:51:36 +07:00
medvedev1088
49faafa3e0 Merge pull request #127 from evgeniuz/develop
Python 3.5/3.7 compatibility and tox integration
2018-11-16 00:52:17 +07:00
medvedev1088
eb69307ddb Merge pull request #128 from blockchain-etl/develop
Traces
2018-11-14 13:10:12 +07:00
Evgeny Medvedev
c8202d9533 Change export_all command description 2018-11-14 13:05:18 +07:00
Evgeniy Filatov
01c1792ca5 replaced f-strings with str.format 2018-11-12 16:12:06 +02:00
Evgeniy Filatov
32e7f593be python 3.7 is not available on trusty 2018-11-12 15:53:12 +02:00
Evgeniy Filatov
538d841906 fixed typo 2018-11-12 15:49:37 +02:00
Evgeniy Filatov
3050f50893 updated travis to use tox 2018-11-12 15:47:32 +02:00
Evgeniy Filatov
49c6f042d7 added comment about minimum python version 2018-11-12 15:30:43 +02:00
Evgeniy Filatov
320f592e51 added tox tests, fixed some incompatibilities with python 3.5 2018-11-12 15:25:45 +02:00
medvedev1088
c0c8fd5845 Merge pull request #126 from blockchain-etl/feature/genesis-allocations
Feature/genesis allocations
2018-11-10 23:30:31 +07:00
Evgeny Medvedev
7b9276c5a2 Add licence header 2018-11-10 23:26:01 +07:00
Evgeny Medvedev
e5e15b262d Added daofork traces 2018-11-10 23:15:25 +07:00
Evgeny Medvedev
4092ce92b9 Add genesis traces support 2018-11-10 21:37:13 +07:00
Evgeny Medvedev
819f26e09e Print deprecation warning to stderr 2018-11-09 17:02:38 +07:00
medvedev1088
b500542437 Merge pull request #125 from blockchain-etl/cli-changes
Prepare for pip
2018-11-09 16:33:08 +07:00
Evgeny Medvedev
652193a2f2 Update Dockerfile 2018-11-09 16:23:10 +07:00
Evgeny Medvedev
7ecdfa4fb7 Fix ValueError when exporting contracts https://github.com/blockchain-etl/ethereum-etl/issues/113 2018-10-23 23:32:54 +07:00
Evgeny Medvedev
10e95f19d0 Update .dockerignore 2018-10-14 20:38:59 +07:00
Evgeny Medvedev
da68fe948b Upload last_synced_block.txt 2018-10-10 22:32:20 +07:00
Evgeny Medvedev
cc3ed86f3b Download last_synced_block_file.txt from GCS bucket 2018-10-10 21:26:25 +07:00
Evgeny Medvedev
60017a5abe Add initialization with start block 2018-10-10 20:22:52 +07:00
Evgeny Medvedev
8cc869694d Update kube.yml 2018-10-10 20:22:39 +07:00
Evgeny Medvedev
3fbf70fb4f Add type when joining 2018-09-28 13:12:04 +07:00
Evgeny Medvedev
f7e7e55441 Fix if condition 2018-09-28 00:05:21 +07:00
Evgeny Medvedev
d677d442bd Add enrichment to streaming.py 2018-09-27 23:49:36 +07:00
Evgeny Medvedev
7a47d93d9e Add docker configs 2018-09-27 16:54:08 +07:00
Evgeny Medvedev
e102f76631 Add pubsub_publish_test.py 2018-09-27 16:54:01 +07:00
Evgeny Medvedev
9bd9d4347b Improve logging 2018-09-13 00:15:59 +07:00
Evgeny Medvedev
54494aef6c Optimize publishing to PubSub 2018-09-13 00:13:17 +07:00
Evgeny Medvedev
c4c3ccc79a Add streaming with Google PubSub 2018-09-12 23:50:49 +07:00
240 changed files with 15379 additions and 1160 deletions

4
.dockerignore Normal file
View File

@@ -0,0 +1,4 @@
.*
last_synced_block.txt
pid.txt
output

4
.github/FUNDING.yml vendored Normal file
View File

@@ -0,0 +1,4 @@
# These are supported funding model platforms
custom: https://gitcoin.co/grants/233/ethereumetl

View File

@@ -0,0 +1,20 @@
name: Publish DockerHub
on:
push:
tags:
- '*'
jobs:
build:
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@master
- name: Publish to DockerHub
if: startsWith(github.event.ref, 'refs/tags/v')
uses: elgohr/Publish-Docker-Github-Action@master
with:
name: blockchainetl/ethereum-etl
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
tag_semver: true

30
.github/workflows/publish-to-pypi.yml vendored Normal file
View File

@@ -0,0 +1,30 @@
name: Publish to PyPI and TestPyPI
on:
push:
tags:
- '*'
jobs:
build-n-publish:
name: Build and publish to PyPI and TestPyPI
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@master
- name: Set up Python 3.7
uses: actions/setup-python@v1
with:
python-version: 3.7
- name: Build a binary wheel and a source tarball
run: python setup.py sdist
- name: Publish distribution to Test PyPI
if: startsWith(github.event.ref, 'refs/tags/v')
uses: pypa/gh-action-pypi-publish@master
with:
password: ${{ secrets.test_pypi_password }}
repository_url: https://test.pypi.org/legacy/
- name: Publish distribution to PyPI
if: startsWith(github.event.ref, 'refs/tags/v')
uses: pypa/gh-action-pypi-publish@master
with:
password: ${{ secrets.pypi_password }}

View File

@@ -1,7 +1,16 @@
language: python
python:
- "3.6"
dist: xenial
matrix:
include:
- python: "3.5"
env: TOX_POSARGS="-e py35"
- python: "3.6"
env: TOX_POSARGS="-e py36"
- python: "3.7"
env: TOX_POSARGS="-e py37"
- python: "3.8"
env: TOX_POSARGS="-e py38"
install:
- travis_retry pip install -r requirements.txt
- travis_retry pip install tox
script:
- pytest -vv
- tox $TOX_POSARGS

View File

@@ -1,12 +1,15 @@
FROM python:3.6-alpine
MAINTAINER Eric Lim <elim0322@gmail.com>
FROM python:3.6
MAINTAINER Evgeny Medvedev <evge.medvedev@gmail.com>
ENV PROJECT_DIR=ethereum-etl
RUN mkdir /$PROJECT_DIR
WORKDIR /$PROJECT_DIR
COPY requirements.txt .
RUN apk add --no-cache gcc musl-dev #for C libraries: <limits.h> <stdio.h>
RUN pip install --upgrade pip && pip install -r /$PROJECT_DIR/requirements.txt
COPY . .
RUN pip install --upgrade pip && pip install -e /$PROJECT_DIR/[streaming]
ENTRYPOINT ["python", "export_all.py"]
# Add Tini
ENV TINI_VERSION v0.18.0
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
RUN chmod +x /tini
ENTRYPOINT ["/tini", "--", "python", "ethereumetl"]

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com, https://twitter.com/EvgeMedvedev
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

581
README.md
View File

@@ -1,544 +1,127 @@
# Ethereum ETL
[![Join the chat at https://gitter.im/ethereum-eth](https://badges.gitter.im/ethereum-etl.svg)](https://gitter.im/ethereum-etl/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Build Status](https://travis-ci.org/blockchain-etl/ethereum-etl.png)](https://travis-ci.org/blockchain-etl/ethereum-etl)
[Join Telegram Group](https://t.me/joinchat/GsMpbA3mv1OJ6YMp3T5ORQ)
[![Join the chat at https://gitter.im/ethereum-eth](https://badges.gitter.im/ethereum-etl.svg)](https://gitter.im/ethereum-etl/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Telegram](https://img.shields.io/badge/telegram-join%20chat-blue.svg)](https://t.me/joinchat/GsMpbA3mv1OJ6YMp3T5ORQ)
[![Discord](https://img.shields.io/badge/discord-join%20chat-blue.svg)](https://discord.gg/wukrezR)
Ethereum ETL lets you convert blockchain data into convenient formats like CSVs and relational databases.
*Do you just want to query Ethereum data right away? Use the [public dataset in BigQuery](https://console.cloud.google.com/marketplace/details/ethereum/crypto-ethereum-blockchain).*
[Full documentation available here](http://ethereum-etl.readthedocs.io/).
## Quickstart
Install Ethereum ETL:
```bash
pip install ethereum-etl
pip3 install ethereum-etl
```
Export blocks and transactions ([Schema](#blockscsv), [Reference](#export_blocks_and_transactions)):
Export blocks and transactions ([Schema](docs/schema.md#blockscsv), [Reference](docs/commands.md#export_blocks_and_transactions)):
```bash
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
--provider-uri https://mainnet.infura.io --blocks-output blocks.csv --transactions-output transactions.csv
--blocks-output blocks.csv --transactions-output transactions.csv \
--provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c
```
Export ERC20 and ERC721 transfers ([Schema](#token_transferscsv), [Reference](#export_token_transfers)):
Export ERC20 and ERC721 transfers ([Schema](docs/schema.md#token_transferscsv), [Reference](docs/commands.md##export_token_transfers)):
```bash
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output token_transfers.csv
```
Export receipts and logs ([Schema](#receiptscsv), [Reference](#export_receipts_and_logs)):
```bash
> ethereumetl export_receipts_and_logs --transaction-hashes transaction_hashes.txt \
--provider-uri https://mainnet.infura.io --receipts-output receipts.csv --logs-output logs.csv
```
Export ERC20 and ERC721 token details ([Schema](#tokenscsv), [Reference](#export_tokens)):
```bash
> ethereumetl export_tokens --token-addresses token_addresses.csv \
--provider-uri https://mainnet.infura.io --output tokens.csv
```
Export traces ([Schema](#tracescsv), [Reference](#export_traces)):
Export traces ([Schema](docs/schema.md#tracescsv), [Reference](docs/commands.md#export_traces)):
```bash
> ethereumetl export_traces --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/parity.ipc --output traces.csv
```
---
Stream blocks, transactions, logs, token_transfers continually to console ([Reference](docs/commands.md#stream)):
```bash
> pip3 install ethereum-etl[streaming]
> ethereumetl stream --start-block 500000 -e block,transaction,log,token_transfer --log-file log.txt \
--provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c
```
Find other commands [here](https://ethereum-etl.readthedocs.io/en/latest/commands/).
For the latest version, check out the repo and call
```bash
> pip install -e .
> python ethereumetl.py
> pip3 install -e .
> python3 ethereumetl.py
```
[LIMITATIONS](#limitations)
## Useful Links
## Table of Contents
- [Schema](https://ethereum-etl.readthedocs.io/en/latest/schema/)
- [Command Reference](https://ethereum-etl.readthedocs.io/en/latest/commands/)
- [Documentation](https://ethereum-etl.readthedocs.io/)
- [Exporting the Blockchain](https://ethereum-etl.readthedocs.io/en/latest/exporting-the-blockchain/)
- [Querying in Amazon Athena](https://ethereum-etl.readthedocs.io/en/latest/amazon-athena/)
- [Querying in Google BigQuery](https://ethereum-etl.readthedocs.io/en/latest/google-bigquery/)
- [Querying in Kaggle](https://www.kaggle.com/bigquery/ethereum-blockchain)
- [Airflow DAGs](https://github.com/blockchain-etl/ethereum-etl-airflow)
- [Postgres ETL](https://github.com/blockchain-etl/ethereum-etl-postgresql)
- [Ethereum 2.0 ETL](https://github.com/blockchain-etl/ethereum2-etl)
- [Schema](#schema)
- [blocks.csv](#blockscsv)
- [transactions.csv](#transactionscsv)
- [token_transfers.csv](#token_transferscsv)
- [receipts.csv](#receiptscsv)
- [logs.csv](#logscsv)
- [contracts.csv](#contractscsv)
- [tokens.csv](#tokenscsv)
- [traces.csv](#tracescsv)
- [Exporting the Blockchain](#exporting-the-blockchain)
- [Export in 2 Hours](#export-in-2-hours)
- [Command Reference](#command-reference)
- [Querying in Amazon Athena](#querying-in-amazon-athena)
- [Querying in Google BigQuery](#querying-in-google-bigquery)
- [Public Dataset](#public-dataset)
## Schema
### blocks.csv
Column | Type |
------------------------|--------------------|
number | bigint |
hash | hex_string |
parent_hash | hex_string |
nonce | hex_string |
sha3_uncles | hex_string |
logs_bloom | hex_string |
transactions_root | hex_string |
state_root | hex_string |
receipts_root | hex_string |
miner | address |
difficulty | numeric |
total_difficulty | numeric |
size | bigint |
extra_data | hex_string |
gas_limit | bigint |
gas_used | bigint |
timestamp | bigint |
transaction_count | bigint |
### transactions.csv
Column | Type |
--------------------|-------------|
hash | hex_string |
nonce | bigint |
block_hash | hex_string |
block_number | bigint |
transaction_index| bigint |
from_address | address |
to_address | address |
value | numeric |
gas | bigint |
gas_price | bigint |
input | hex_string |
### token_transfers.csv
Column | Type |
--------------------|-------------|
token_address | address |
from_address | address |
to_address | address |
value | numeric |
transaction_hash | hex_string |
log_index | bigint |
block_number | bigint |
### receipts.csv
Column | Type |
-----------------------------|-------------|
transaction_hash | hex_string |
transaction_index | bigint |
block_hash | hex_string |
block_number | bigint |
cumulative_gas_used | bigint |
gas_used | bigint |
contract_address | address |
root | hex_string |
status | bigint |
### logs.csv
Column | Type |
-----------------------------|-------------|
log_index | bigint |
transaction_hash | hex_string |
transaction_index | bigint |
block_hash | hex_string |
block_number | bigint |
address | address |
data | hex_string |
topics | string |
### contracts.csv
Column | Type |
-----------------------------|-------------|
address | address |
bytecode | hex_string |
function_sighashes | string |
is_erc20 | boolean |
is_erc721 | boolean |
### tokens.csv
Column | Type |
-----------------------------|-------------|
address | address |
symbol | string |
name | string |
decimals | bigint |
total_supply | numeric |
### traces.csv
Column | Type |
-----------------------------|-------------|
block_number | bigint |
transaction_hash | hex_string |
transaction_index | bigint |
from_address | address |
to_address | address |
value | numeric |
input | hex_string |
output | hex_string |
trace_type | string |
call_type | string |
reward_type | string |
gas | bigint |
gas_used | bigint |
subtraces | bigint |
trace_address | string |
error | string |
You can find column descriptions in [https://github.com/medvedev1088/ethereum-etl-airflow](https://github.com/medvedev1088/ethereum-etl-airflow/tree/master/dags/resources/stages/raw/schemas)
Note: for the `address` type all hex characters are lower-cased.
`boolean` type can have 2 values: `True` or `False`.
## LIMITATIONS
- `contracts.csv` and `tokens.csv` files dont include contracts created by message calls (a.k.a. internal transactions).
We are working on adding support for those.
- In case the contract is a proxy, which forwards all calls to a delegate, interface detection doesnt work,
which means `is_erc20` and `is_erc721` will always be false for proxy contracts.
- The metadata methods (`symbol`, `name`, `decimals`, `total_supply`) for ERC20 are optional, so around 10% of the
contracts are missing this data. Also some contracts (EOS) implement these methods but with wrong return type,
so the metadata columns are missing in this case as well.
- `token_transfers.value`, `tokens.decimals` and `tokens.total_supply` have type `STRING` in BigQuery tables,
because numeric types there can't handle 32-byte integers. You should use
`cast(value as FLOAT64)` (possible loss of precision) or
`safe_cast(value as NUMERIC)` (possible overflow) to convert to numbers.
- The contracts that don't implement `decimals()` function but have the
[fallback function](https://solidity.readthedocs.io/en/v0.4.21/contracts.html#fallback-function) that returns a `boolean`
will have `0` or `1` in the `decimals` column in the CSVs.
### Differences between geth and parity traces.csv
- `to_address` field differs for `callcode` trace (geth seems to return correct value, as parity value of `to_address` is same as `to_address` of parent call);
- geth output doesn't have `reward` traces;
- geth output doesn't have `to_address`, `from_address`, `value` for `suicide traces;
- `error` field contains human readable error message, which might differ in geth/parity output;
- geth output doesn't have `transaction_hash`;
- `gas_used` is 0 on traces with error in geth, empty in parity;
- zero output of subcalls is `0x000...` in geth, `0x` in parity;
## Exporting the Blockchain
1. Install python 3.6 https://www.python.org/downloads/ (3.5 and 3.7 are not supported by this tool for now)
1. You can use Infura if you don't need ERC20 transfers (Infura doesn't support eth_getFilterLogs JSON RPC method).
For that use `-p https://mainnet.infura.io` option for the commands below. If you need ERC20 transfers or want to
export the data ~40 times faster, you will need to set up a local Ethereum node:
1. Install geth https://github.com/ethereum/go-ethereum/wiki/Installing-Geth
1. Start geth.
Make sure it downloaded the blocks that you need by executing `eth.syncing` in the JS console.
You can export blocks below `currentBlock`,
there is no need to wait until the full sync as the state is not needed (unless you also need contracts bytecode
and token details; for those you need to wait until the full sync).
1. Install Ethereum ETL:
```bash
> pip install ethereum-etl
```
1. Export all:
```bash
> ethereumetl export_all --help
> ethereumetl export_all -s 0 -e 5999999 -b 100000 -p file://$HOME/Library/Ethereum/geth.ipc -o output
```
The result will be in the `output` subdirectory, partitioned in Hive style:
```bash
output/blocks/start_block=00000000/end_block=00099999/blocks_00000000_00099999.csv
output/blocks/start_block=00100000/end_block=00199999/blocks_00100000_00199999.csv
...
output/transactions/start_block=00000000/end_block=00099999/transactions_00000000_00099999.csv
...
output/token_transfers/start_block=00000000/end_block=00099999/token_transfers_00000000_00099999.csv
...
```
Should work with geth and parity, on Linux, Mac, Windows.
If you use Parity you should disable warp mode with `--no-warp` option because warp mode
does not place all of the block or receipt data into the database https://wiki.parity.io/Getting-Synced
Tested with Python 3.6, geth 1.8.7, Ubuntu 16.04.4
If you see weird behavior, e.g. wrong number of rows in the CSV files or corrupted files,
check this issue: https://github.com/medvedev1088/ethereum-etl/issues/28
### Export in 2 Hours
You can use AWS Auto Scaling and Data Pipeline to reduce the exporting time to a few hours.
Read this article for details https://medium.com/@medvedev1088/how-to-export-the-entire-ethereum-blockchain-to-csv-in-2-hours-for-10-69fef511e9a2
### Running in Docker
1. Install Docker https://docs.docker.com/install/
1. Build a docker image
```bash
> docker build -t ethereum-etl:latest .
> docker image ls
```
1. Run a container out of the image
```bash
> docker run -v $HOME/output:/ethereum-etl/output ethereum-etl:latest -s 0 -e 5499999 -b 100000 -p https://mainnet.infura.io
> docker run -v $HOME/output:/ethereum-etl/output ethereum-etl:latest -s 2018-01-01 -e 2018-01-01 -b 100000 -p https://mainnet.infura.io
```
### Command Reference
- [export_blocks_and_transactions](#export_blocks_and_transactions)
- [export_token_transfers](#export_token_transfers)
- [extract_token_transfers](#extract_token_transfers)
- [export_receipts_and_logs](#export_receipts_and_logs)
- [export_contracts](#export_contracts)
- [export_tokens](#export_tokens)
- [export_traces](#export_traces)
- [export_geth_traces](#export_geth_traces)
- [extract_geth_traces](#extract_geth_traces)
- [get_block_range_for_date](#get_block_range_for_date)
- [get_keccak_hash](#get_keccak_hash)
All the commands accept `-h` parameter for help, e.g.:
## Running Tests
```bash
> ethereumetl export_blocks_and_transactions -h
Usage: ethereumetl export_blocks_and_transactions [OPTIONS]
Export blocks and transactions.
Options:
-s, --start-block INTEGER Start block
-e, --end-block INTEGER End block [required]
-b, --batch-size INTEGER The number of blocks to export at a time.
-p, --provider-uri TEXT The URI of the web3 provider e.g.
file://$HOME/Library/Ethereum/geth.ipc or
https://mainnet.infura.io
-w, --max-workers INTEGER The maximum number of workers.
--blocks-output TEXT The output file for blocks. If not provided
blocks will not be exported. Use "-" for stdout
--transactions-output TEXT The output file for transactions. If not
provided transactions will not be exported. Use
"-" for stdout
-h, --help Show this message and exit.
```
For the `--output` parameters the supported types are csv and json. The format type is inferred from the output file name.
#### export_blocks_and_transactions
```bash
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc \
--blocks-output blocks.csv --transactions-output transactions.csv
```
Omit `--blocks-output` or `--transactions-output` options if you want to export only transactions/blocks.
You can tune `--batch-size`, `--max-workers` for performance.
#### export_token_transfers
The API used in this command is not supported by Infura, so you will need a local node.
If you want to use Infura for exporting ERC20 transfers refer to [extract_token_transfers](#extract_token_transfers)
```bash
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --batch-size 100 --output token_transfers.csv
```
Include `--tokens <token1> --tokens <token2>` to filter only certain tokens, e.g.
```bash
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output token_transfers.csv \
--tokens 0x86fa049857e0209aa7d9e616f7eb3b3b78ecfdb0 --tokens 0x06012c8cf97bead5deae237070f9587f8e7a266d
```
You can tune `--batch-size`, `--max-workers` for performance.
#### export_receipts_and_logs
First extract transaction hashes from `transactions.csv`
(Exported with [export_blocks_and_transactions](#export_blocks_and_transactions)):
```bash
> ethereumetl extract_csv_column --input transactions.csv --column hash --output transaction_hashes.txt
```
Then export receipts and logs:
```bash
> ethereumetl export_receipts_and_logs --transaction-hashes transaction_hashes.txt \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --receipts-output receipts.csv --logs-output logs.csv
```
Omit `--receipts-output` or `--logs-output` options if you want to export only logs/receipts.
You can tune `--batch-size`, `--max-workers` for performance.
Upvote this feature request https://github.com/paritytech/parity/issues/9075,
it will make receipts and logs export much faster.
#### extract_token_transfers
First export receipt logs with [export_receipts_and_logs](#export_receipts_and_logs).
Then extract transfers from the logs.csv file:
```bash
> ethereumetl extract_token_transfers --logs logs.csv --output token_transfers.csv
```
You can tune `--batch-size`, `--max-workers` for performance.
#### export_contracts
First extract contract addresses from `receipts.csv`
(Exported with [export_receipts_and_logs](#export_receipts_and_logs)):
```bash
> ethereumetl extract_csv_column --input receipts.csv --column contract_address --output contract_addresses.txt
```
Then export contracts:
```bash
> ethereumetl export_contracts --contract-addresses contract_addresses.txt \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output contracts.csv
```
You can tune `--batch-size`, `--max-workers` for performance.
#### export_tokens
First extract token addresses from `contracts.json`
(Exported with [export_contracts](#export_contracts)):
```bash
> ethereumetl filter_items -i contracts.json -p "item['is_erc20'] or item['is_erc721']" | \
ethereumetl extract_field -f address -o token_addresses.txt
```
Then export ERC20 / ERC721 tokens:
```bash
> ethereumetl export_tokens --token-addresses token_addresses.txt \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output tokens.csv
```
You can tune `--max-workers` for performance.
#### export_traces
The API used in this command is not supported by Infura,
so you will need a local Parity archive node (`parity --tracing on`).
```bash
> ethereumetl export_traces --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/parity.ipc --batch-size 100 --output traces.csv
```
You can tune `--batch-size`, `--max-workers` for performance.
#### export_geth_traces
The API used in this command is not supported by Infura,
so you will need a local Geth archive node (`geth --gcmode archive --syncmode full --ipcapi debug`).
When using rpc, add `--rpc --rpcapi debug` options.
```bash
> ethereumetl export_geth_traces --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --batch-size 100 --output geth_traces.json
```
You can tune `--batch-size`, `--max-workers` for performance.
#### extract_geth_traces
```bash
> ethereumetl extract_geth_traces --input geth_traces.json --output traces.csv
```
You can tune `--batch-size`, `--max-workers` for performance.
#### get_block_range_for_date
```bash
> ethereumetl get_block_range_for_date --provider-uri=https://mainnet.infura.io --date 2018-01-01
4832686,4838611
```
#### get_keccak_hash
```bash
> ethereumetl get_keccak_hash -i "transfer(address,uint256)"
0xa9059cbb2ab09eb219583f4a59a5d0623ade346d962bcd4e46b11da047c9049b
```
#### Running Tests
```bash
> pip install -e . -r requirements.txt
> pip3 install -e .[dev,streaming]
> export ETHEREUM_ETL_RUN_SLOW_TESTS=True
> pytest -vv
```
## Querying in Amazon Athena
- Upload the files to S3:
### Running Tox Tests
```bash
> cd output
> aws s3 sync . s3://<your_bucket>/ethereumetl/export --region ap-southeast-1
> pip3 install tox
> tox
```
- Sign in to Athena https://console.aws.amazon.com/athena/home
## Running in Docker
1. Install Docker https://docs.docker.com/install/
2. Build a docker image
> docker build -t ethereum-etl:latest .
> docker image ls
3. Run a container out of the image
> docker run -v $HOME/output:/ethereum-etl/output ethereum-etl:latest export_all -s 0 -e 5499999 -b 100000 -p https://mainnet.infura.io
> docker run -v $HOME/output:/ethereum-etl/output ethereum-etl:latest export_all -s 2018-01-01 -e 2018-01-01 -p https://mainnet.infura.io
4. Run streaming to console or Pub/Sub
> docker build -t ethereum-etl:latest -f Dockerfile .
> echo "Stream to console"
> docker run ethereum-etl:latest stream --start-block 500000 --log-file log.txt
> echo "Stream to Pub/Sub"
> docker run -v /path_to_credentials_file/:/ethereum-etl/ --env GOOGLE_APPLICATION_CREDENTIALS=/ethereum-etl/credentials_file.json ethereum-etl:latest stream --start-block 500000 --output projects/<your-project>/topics/crypto_ethereum
## Projects using Ethereum ETL
* [Google](https://goo.gl/oY5BCQ) - Public BigQuery Ethereum datasets
* [Nansen by D5](https://nansen.d5.ai/?ref=ethereumetl) - Analytics platform for Ethereum
- Create a database:
```sql
CREATE DATABASE ethereumetl;
```
- Create the tables:
- blocks: [schemas/aws/blocks.sql](schemas/aws/blocks.sql)
- transactions: [schemas/aws/transactions.sql](schemas/aws/transactions.sql)
- token_transfers: [schemas/aws/token_transfers.sql](schemas/aws/token_transfers.sql)
- contracts: [schemas/aws/contracts.sql](schemas/aws/contracts.sql)
- receipts: [schemas/aws/receipts.sql](schemas/aws/receipts.sql)
- logs: [schemas/aws/logs.sql](schemas/aws/logs.sql)
- tokens: [schemas/aws/tokens.sql](schemas/aws/tokens.sql)
### Tables for Parquet Files
Read this article on how to convert CSVs to Parquet https://medium.com/@medvedev1088/converting-ethereum-etl-files-to-parquet-399e048ddd30
- Create the tables:
- parquet_blocks: [schemas/aws/parquet/parquet_blocks.sql](schemas/aws/parquet/parquet_blocks.sql)
- parquet_transactions: [schemas/aws/parquet/parquet_transactions.sql](schemas/aws/parquet/parquet_transactions.sql)
- parquet_token_transfers: [schemas/aws/parquet/parquet_token_transfers.sql](schemas/aws/parquet/parquet_token_transfers.sql)
Note that DECIMAL type is limited to 38 digits in Hive https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-decimal
so values greater than 38 decimals will be null.
## Querying in Google BigQuery
Refer to https://github.com/medvedev1088/ethereum-etl-airflow for the instructions.
### Public Dataset
You can query the data that's updated daily in the public BigQuery dataset
https://medium.com/@medvedev1088/ethereum-blockchain-on-google-bigquery-283fb300f579
export_contracts
--contract-addresses=addresses.txt
--output=balances.csv
--provider-uri
https://mainnet.infura.io/v3/12dcb369f6fe452b9c303009bdd60fbf
-w
10
-b
1000
```

1046
addresses.txt Normal file

File diff suppressed because it is too large Load Diff

View File

View File

@@ -20,11 +20,16 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import itertools
from ethereumetl.cli.export_all import export_all
print('========================================================================================')
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
print('========================================================================================')
# https://stackoverflow.com/a/27062830/1580227
class AtomicCounter:
def __init__(self):
self._counter = itertools.count()
# init to 0
next(self._counter)
export_all()
def increment(self, increment=1):
assert increment > 0
return [next(self._counter) for _ in range(0, increment)][-1]

View File

@@ -21,10 +21,22 @@
# SOFTWARE.
from ethereumetl.cli.export_receipts_and_logs import export_receipts_and_logs
# https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072
print('========================================================================================')
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
print('========================================================================================')
import sys
import csv
export_receipts_and_logs()
def set_max_field_size_limit():
max_int = sys.maxsize
decrement = True
while decrement:
# decrease the maxInt value by factor 10
# as long as the OverflowError occurs.
decrement = False
try:
csv.field_size_limit(max_int)
except OverflowError:
max_int = int(max_int / 10)
decrement = True

213
blockchainetl/exporters.py Normal file
View File

@@ -0,0 +1,213 @@
# Copyright (c) Scrapy developers.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of Scrapy nor the names of its contributors may be used
# to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
Item Exporters are used to export/serialize items into different formats.
"""
import csv
import io
import threading
from json import JSONEncoder
import decimal
import six
class BaseItemExporter(object):
def __init__(self, **kwargs):
self._configure(kwargs)
def _configure(self, options, dont_fail=False):
"""Configure the exporter by poping options from the ``options`` dict.
If dont_fail is set, it won't raise an exception on unexpected options
(useful for using with keyword arguments in subclasses constructors)
"""
self.encoding = options.pop('encoding', None)
self.fields_to_export = options.pop('fields_to_export', None)
self.export_empty_fields = options.pop('export_empty_fields', False)
self.indent = options.pop('indent', None)
if not dont_fail and options:
raise TypeError("Unexpected options: %s" % ', '.join(options.keys()))
def export_item(self, item):
raise NotImplementedError
def serialize_field(self, field, name, value):
serializer = field.get('serializer', lambda x: x)
return serializer(value)
def start_exporting(self):
pass
def finish_exporting(self):
pass
def _get_serialized_fields(self, item, default_value=None, include_empty=None):
"""Return the fields to export as an iterable of tuples
(name, serialized_value)
"""
if include_empty is None:
include_empty = self.export_empty_fields
if self.fields_to_export is None:
if include_empty and not isinstance(item, dict):
field_iter = six.iterkeys(item.fields)
else:
field_iter = six.iterkeys(item)
else:
if include_empty:
field_iter = self.fields_to_export
else:
field_iter = (x for x in self.fields_to_export if x in item)
for field_name in field_iter:
if field_name in item:
field = {} if isinstance(item, dict) else item.fields[field_name]
value = self.serialize_field(field, field_name, item[field_name])
else:
value = default_value
yield field_name, value
class CsvItemExporter(BaseItemExporter):
def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs):
self._configure(kwargs, dont_fail=True)
if not self.encoding:
self.encoding = 'utf-8'
self.include_headers_line = include_headers_line
self.stream = io.TextIOWrapper(
file,
line_buffering=False,
write_through=True,
encoding=self.encoding
) if six.PY3 else file
self.csv_writer = csv.writer(self.stream, **kwargs)
self._headers_not_written = True
self._join_multivalued = join_multivalued
self._write_headers_lock = threading.Lock()
def serialize_field(self, field, name, value):
serializer = field.get('serializer', self._join_if_needed)
return serializer(value)
def _join_if_needed(self, value):
if isinstance(value, (list, tuple)):
try:
return self._join_multivalued.join(str(x) for x in value)
except TypeError: # list in value may not contain strings
pass
return value
def export_item(self, item):
# Double-checked locking (safe in Python because of GIL) https://en.wikipedia.org/wiki/Double-checked_locking
if self._headers_not_written:
with self._write_headers_lock:
if self._headers_not_written:
self._write_headers_and_set_fields_to_export(item)
self._headers_not_written = False
fields = self._get_serialized_fields(item, default_value='',
include_empty=True)
values = list(self._build_row(x for _, x in fields))
self.csv_writer.writerow(values)
def _build_row(self, values):
for s in values:
try:
yield to_native_str(s, self.encoding)
except TypeError:
yield s
def _write_headers_and_set_fields_to_export(self, item):
if self.include_headers_line:
if not self.fields_to_export:
if isinstance(item, dict):
# for dicts try using fields of the first item
self.fields_to_export = list(item.keys())
else:
# use fields declared in Item
self.fields_to_export = list(item.fields.keys())
row = list(self._build_row(self.fields_to_export))
self.csv_writer.writerow(row)
def EncodeDecimal(o):
if isinstance(o, decimal.Decimal):
return float(round(o, 8))
raise TypeError(repr(o) + " is not JSON serializable")
class JsonLinesItemExporter(BaseItemExporter):
def __init__(self, file, **kwargs):
self._configure(kwargs, dont_fail=True)
self.file = file
kwargs.setdefault('ensure_ascii', not self.encoding)
# kwargs.setdefault('default', EncodeDecimal)
self.encoder = JSONEncoder(default=EncodeDecimal, **kwargs)
def export_item(self, item):
itemdict = dict(self._get_serialized_fields(item))
data = self.encoder.encode(itemdict) + '\n'
self.file.write(to_bytes(data, self.encoding))
def to_native_str(text, encoding=None, errors='strict'):
""" Return str representation of `text`
(bytes in Python 2.x and unicode in Python 3.x). """
if six.PY2:
return to_bytes(text, encoding, errors)
else:
return to_unicode(text, encoding, errors)
def to_bytes(text, encoding=None, errors='strict'):
"""Return the binary representation of `text`. If `text`
is already a bytes object, return it as-is."""
if isinstance(text, bytes):
return text
if not isinstance(text, six.string_types):
raise TypeError('to_bytes must receive a unicode, str or bytes '
'object, got %s' % type(text).__name__)
if encoding is None:
encoding = 'utf-8'
return text.encode(encoding, errors)
def to_unicode(text, encoding=None, errors='strict'):
"""Return the unicode representation of a bytes object `text`. If `text`
is already an unicode object, return it as-is."""
if isinstance(text, six.text_type):
return text
if not isinstance(text, (bytes, six.text_type)):
raise TypeError('to_unicode must receive a bytes, str or unicode '
'object, got %s' % type(text).__name__)
if encoding is None:
encoding = 'utf-8'
return text.decode(encoding, errors)

View File

View File

View File

@@ -21,15 +21,15 @@
# SOFTWARE.
import logging
from ethereumetl.atomic_counter import AtomicCounter
from ethereumetl.exporters import CsvItemExporter, JsonLinesItemExporter
from ethereumetl.file_utils import get_file_handle, close_silently
from blockchainetl.atomic_counter import AtomicCounter
from blockchainetl.exporters import CsvItemExporter, JsonLinesItemExporter
from blockchainetl.file_utils import get_file_handle, close_silently
class CompositeItemExporter:
def __init__(self, filename_mapping, field_mapping):
def __init__(self, filename_mapping, field_mapping=None):
self.filename_mapping = filename_mapping
self.field_mapping = field_mapping
self.field_mapping = field_mapping or {}
self.file_mapping = {}
self.exporter_mapping = {}
@@ -40,7 +40,7 @@ class CompositeItemExporter:
def open(self):
for item_type, filename in self.filename_mapping.items():
file = get_file_handle(filename, binary=True)
fields = self.field_mapping[item_type]
fields = self.field_mapping.get(item_type)
self.file_mapping[item_type] = file
if str(filename).endswith('.json'):
item_exporter = JsonLinesItemExporter(file, fields_to_export=fields)
@@ -50,12 +50,16 @@ class CompositeItemExporter:
self.counter_mapping[item_type] = AtomicCounter()
def export_items(self, items):
for item in items:
self.export_item(item)
def export_item(self, item):
item_type = item.get('type')
if item_type is None:
raise ValueError('type key is not found in item {}'.format(repr(item)))
raise ValueError('"type" key is not found in item {}'.format(repr(item)))
exporter = self.exporter_mapping[item_type]
exporter = self.exporter_mapping.get(item_type)
if exporter is None:
raise ValueError('Exporter for item type {} not found'.format(item_type))
exporter.export_item(item)

View File

@@ -20,11 +20,19 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import json
from ethereumetl.cli.filter_items import filter_items
print('========================================================================================')
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
print('========================================================================================')
class ConsoleItemExporter:
def open(self):
pass
filter_items()
def export_items(self, items):
for item in items:
self.export_item(item)
def export_item(self, item):
print(json.dumps(item))
def close(self):
pass

View File

@@ -0,0 +1,42 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# MIT License
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
class CompositeItemConverter:
def __init__(self, converters=()):
self.converters = converters
def convert_item(self, item):
for converter in self.converters:
item = converter.convert_item(item)
return item

View File

@@ -0,0 +1,47 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# MIT License
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
#
from decimal import Decimal
from blockchainetl.jobs.exporters.converters.simple_item_converter import SimpleItemConverter
# Large ints are not handled correctly by pg8000 so we use Decimal instead:
# https://github.com/mfenniak/pg8000/blob/412eace074514ada824e7a102765e37e2cda8eaa/pg8000/core.py#L1703
class IntToDecimalItemConverter(SimpleItemConverter):
def convert_field(self, key, value):
if isinstance(value, int):
return Decimal(value)
else:
return value

View File

@@ -0,0 +1,56 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# MIT License
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
class ListFieldItemConverter:
def __init__(self, field, new_field_prefix, fill=0, fill_with=None):
self.field = field
self.new_field_prefix = new_field_prefix
self.fill = fill
self.fill_with = fill_with
def convert_item(self, item):
if not item:
return item
lst = item.get(self.field)
result = item
if lst is not None and isinstance(lst, list):
result = item.copy()
del result[self.field]
for lst_item_index, lst_item in enumerate(lst):
result[self.new_field_prefix + str(lst_item_index)] = lst_item
if len(lst) < self.fill:
for i in range(len(lst), self.fill):
result[self.new_field_prefix + str(i)] = self.fill_with
return result

View File

@@ -0,0 +1,45 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# MIT License
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
class SimpleItemConverter:
def __init__(self, converters=()):
self.converters = converters
def convert_item(self, item):
return {
key: self.convert_field(key, value) for key, value in item.items()
}
def convert_field(self, key, value):
return value

View File

@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2018 Evgeniy Filatov, evgeniyfilatov@gmail.com
# Copyright (c) 2020 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -20,11 +20,22 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from datetime import datetime
from ethereumetl.cli.export_geth_traces import export_geth_traces
from blockchainetl.jobs.exporters.converters.simple_item_converter import SimpleItemConverter
print('========================================================================================')
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
print('========================================================================================')
export_geth_traces()
class UnixTimestampItemConverter(SimpleItemConverter):
def convert_field(self, key, value):
if key is not None and key.endswith('timestamp'):
return to_timestamp(value)
else:
return value
def to_timestamp(value):
if isinstance(value, int):
return datetime.utcfromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S')
else:
return value

View File

@@ -0,0 +1,90 @@
# MIT License
#
# Copyright (c) 2020 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import json
import logging
from collections import defaultdict
from google.cloud import storage
class GcsItemExporter:
def __init__(
self,
bucket):
self.bucket = bucket
self.storage_client = storage.Client()
def open(self):
pass
def export_items(self, items):
block_bundles = build_block_bundles(items)
for block_bundle in block_bundles:
block_number = block_bundle['block']['number']
destination_blob_name = f'blocks/{block_number}.json'
bucket = self.storage_client.bucket(self.bucket)
blob = bucket.blob(destination_blob_name)
blob.upload_from_string(json.dumps(block_bundle))
logging.info(f'Uploaded file gs://{self.bucket}/{destination_blob_name}')
def close(self):
pass
def build_block_bundles(items):
blocks = defaultdict(list)
transactions = defaultdict(list)
logs = defaultdict(list)
token_transfers = defaultdict(list)
traces = defaultdict(list)
for item in items:
item_type = item.get('type')
if item_type == 'block':
blocks[item.get('number')].append(item)
elif item_type == 'transaction':
transactions[item.get('block_number')].append(item)
elif item_type == 'log':
logs[item.get('block_number')].append(item)
elif item_type == 'token_transfer':
token_transfers[item.get('block_number')].append(item)
elif item_type == 'trace':
traces[item.get('block_number')].append(item)
else:
logging.info(f'Skipping item with type {item_type}')
block_bundles = []
for block_number in sorted(blocks.keys()):
if len(blocks[block_number]) != 1:
raise ValueError(f'There must be a single block for a given block number, was {len(blocks[block_number])} for block number {block_number}')
block_bundles.append({
'block': blocks[block_number][0],
'transactions': transactions[block_number],
'logs': logs[block_number],
'token_transfers': token_transfers[block_number],
'traces': traces[block_number],
})
return block_bundles

View File

@@ -0,0 +1,101 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import json
import logging
from google.cloud import pubsub_v1
from timeout_decorator import timeout_decorator
class GooglePubSubItemExporter:
def __init__(
self,
item_type_to_topic_mapping,
message_attributes=('item_id', 'item_timestamp'),
enable_message_ordering=False):
self.item_type_to_topic_mapping = item_type_to_topic_mapping
self.enable_message_ordering = enable_message_ordering
self.publisher = create_publisher(enable_message_ordering)
self.message_attributes = message_attributes
def open(self):
pass
def export_items(self, items):
try:
self._export_items_with_timeout(items)
except timeout_decorator.TimeoutError as e:
# A bug in PubSub publisher that makes it stalled after running for some time.
# Exception in thread Thread-CommitBatchPublisher:
# details = "channel is in state TRANSIENT_FAILURE"
# https://stackoverflow.com/questions/55552606/how-can-one-catch-exceptions-in-python-pubsub-subscriber-that-are-happening-in-i?noredirect=1#comment97849067_55552606
logging.info('Recreating Pub/Sub publisher.')
self.publisher = create_publisher(self.enable_message_ordering)
raise e
@timeout_decorator.timeout(300)
def _export_items_with_timeout(self, items):
futures = []
for item in items:
message_future = self.export_item(item)
futures.append(message_future)
for future in futures:
# result() blocks until the message is published.
future.result()
def export_item(self, item):
item_type = item.get('type')
if item_type is not None and item_type in self.item_type_to_topic_mapping:
topic_path = self.item_type_to_topic_mapping.get(item_type)
data = json.dumps(item).encode('utf-8')
ordering_key = 'all' if self.enable_message_ordering else ''
message_future = self.publisher.publish(topic_path, data=data, ordering_key=ordering_key, **self.get_message_attributes(item))
return message_future
else:
logging.warning('Topic for item type "{}" is not configured.'.format(item_type))
def get_message_attributes(self, item):
attributes = {}
for attr_name in self.message_attributes:
if item.get(attr_name) is not None:
attributes[attr_name] = str(item.get(attr_name))
return attributes
def close(self):
pass
def create_publisher(enable_message_ordering):
batch_settings = pubsub_v1.types.BatchSettings(
max_bytes=1024 * 5, # 5 kilobytes
max_latency=1, # 1 second
max_messages=1000,
)
publisher_options = pubsub_v1.types.PublisherOptions(enable_message_ordering=enable_message_ordering)
return pubsub_v1.PublisherClient(batch_settings=batch_settings, publisher_options=publisher_options)

View File

@@ -21,10 +21,24 @@
# SOFTWARE.
from ethereumetl.cli.export_contracts import export_contracts
class InMemoryItemExporter:
def __init__(self, item_types):
self.item_types = item_types
self.items = {}
print('========================================================================================')
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
print('========================================================================================')
def open(self):
for item_type in self.item_types:
self.items[item_type] = []
export_contracts()
def export_item(self, item):
item_type = item.get('type', None)
if item_type is None:
raise ValueError('type key is not found in item {}'.format(repr(item)))
self.items[item_type].append(item)
def close(self):
pass
def get_items(self, item_type):
return self.items[item_type]

View File

@@ -21,10 +21,22 @@
# SOFTWARE.
from ethereumetl.cli.export_token_transfers import export_token_transfers
class MultiItemExporter:
def __init__(self, item_exporters):
self.item_exporters = item_exporters
print('========================================================================================')
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
print('========================================================================================')
def open(self):
for exporter in self.item_exporters:
exporter.open()
export_token_transfers()
def export_items(self, items):
for exporter in self.item_exporters:
exporter.export_items(items)
def export_item(self, item):
for exporter in self.item_exporters:
exporter.export_item(item)
def close(self):
for exporter in self.item_exporters:
exporter.close()

View File

@@ -0,0 +1,70 @@
# MIT License
#
# Copyright (c) 2020 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import collections
from sqlalchemy import create_engine
from blockchainetl.jobs.exporters.converters.composite_item_converter import CompositeItemConverter
class PostgresItemExporter:
def __init__(self, connection_url, item_type_to_insert_stmt_mapping, converters=(), print_sql=True):
self.connection_url = connection_url
self.item_type_to_insert_stmt_mapping = item_type_to_insert_stmt_mapping
self.converter = CompositeItemConverter(converters)
self.print_sql = print_sql
self.engine = self.create_engine()
def open(self):
pass
def export_items(self, items):
items_grouped_by_type = group_by_item_type(items)
for item_type, insert_stmt in self.item_type_to_insert_stmt_mapping.items():
item_group = items_grouped_by_type.get(item_type)
if item_group:
connection = self.engine.connect()
converted_items = list(self.convert_items(item_group))
connection.execute(insert_stmt, converted_items)
def convert_items(self, items):
for item in items:
yield self.converter.convert_item(item)
def create_engine(self):
engine = create_engine(self.connection_url, echo=self.print_sql, pool_recycle=3600)
return engine
def close(self):
pass
def group_by_item_type(items):
result = collections.defaultdict(list)
for item in items:
result[item.get('type')].append(item)
return result

View File

@@ -0,0 +1,9 @@
import logging
def logging_basic_config(filename=None):
format = '%(asctime)s - %(name)s [%(levelname)s] - %(message)s'
if filename is not None:
logging.basicConfig(level=logging.INFO, format=format, filename=filename)
else:
logging.basicConfig(level=logging.INFO, format=format)

View File

@@ -21,10 +21,3 @@
# SOFTWARE.
from ethereumetl.cli.export_tokens import export_tokens
print('========================================================================================')
print('THIS SCRIPT IS DEPRECATED AND WILL BE REMOVED ON 2019-01-01. Use ethereumetl.py instead.')
print('========================================================================================')
export_tokens()

View File

@@ -0,0 +1,16 @@
from sqlalchemy.dialects.postgresql import insert
def create_insert_statement_for_table(table):
insert_stmt = insert(table)
primary_key_fields = [column.name for column in table.columns if column.primary_key]
if primary_key_fields:
insert_stmt = insert_stmt.on_conflict_do_update(
index_elements=primary_key_fields,
set_={
column.name: insert_stmt.excluded[column.name] for column in table.columns if not column.primary_key
}
)
return insert_stmt

View File

@@ -0,0 +1,139 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import logging
import os
import time
from blockchainetl.streaming.streamer_adapter_stub import StreamerAdapterStub
from blockchainetl.file_utils import smart_open
class Streamer:
def __init__(
self,
blockchain_streamer_adapter=StreamerAdapterStub(),
last_synced_block_file='last_synced_block.txt',
lag=0,
start_block=None,
end_block=None,
period_seconds=10,
block_batch_size=10,
retry_errors=True,
pid_file=None):
self.blockchain_streamer_adapter = blockchain_streamer_adapter
self.last_synced_block_file = last_synced_block_file
self.lag = lag
self.start_block = start_block
self.end_block = end_block
self.period_seconds = period_seconds
self.block_batch_size = block_batch_size
self.retry_errors = retry_errors
self.pid_file = pid_file
if self.start_block is not None or not os.path.isfile(self.last_synced_block_file):
init_last_synced_block_file((self.start_block or 0) - 1, self.last_synced_block_file)
self.last_synced_block = read_last_synced_block(self.last_synced_block_file)
def stream(self):
try:
if self.pid_file is not None:
logging.info('Creating pid file {}'.format(self.pid_file))
write_to_file(self.pid_file, str(os.getpid()))
self.blockchain_streamer_adapter.open()
self._do_stream()
finally:
self.blockchain_streamer_adapter.close()
if self.pid_file is not None:
logging.info('Deleting pid file {}'.format(self.pid_file))
delete_file(self.pid_file)
def _do_stream(self):
while True and (self.end_block is None or self.last_synced_block < self.end_block):
synced_blocks = 0
try:
synced_blocks = self._sync_cycle()
except Exception as e:
# https://stackoverflow.com/a/4992124/1580227
logging.exception('An exception occurred while syncing block data.')
if not self.retry_errors:
raise e
if synced_blocks <= 0:
logging.info('Nothing to sync. Sleeping for {} seconds...'.format(self.period_seconds))
time.sleep(self.period_seconds)
def _sync_cycle(self):
current_block = self.blockchain_streamer_adapter.get_current_block_number()
target_block = self._calculate_target_block(current_block, self.last_synced_block)
blocks_to_sync = max(target_block - self.last_synced_block, 0)
logging.info('Current block {}, target block {}, last synced block {}, blocks to sync {}'.format(
current_block, target_block, self.last_synced_block, blocks_to_sync))
if blocks_to_sync != 0:
self.blockchain_streamer_adapter.export_all(self.last_synced_block + 1, target_block)
logging.info('Writing last synced block {}'.format(target_block))
write_last_synced_block(self.last_synced_block_file, target_block)
self.last_synced_block = target_block
return blocks_to_sync
def _calculate_target_block(self, current_block, last_synced_block):
target_block = current_block - self.lag
target_block = min(target_block, last_synced_block + self.block_batch_size)
target_block = min(target_block, self.end_block) if self.end_block is not None else target_block
return target_block
def delete_file(file):
try:
os.remove(file)
except OSError:
pass
def write_last_synced_block(file, last_synced_block):
write_to_file(file, str(last_synced_block) + '\n')
def init_last_synced_block_file(start_block, last_synced_block_file):
if os.path.isfile(last_synced_block_file):
raise ValueError(
'{} should not exist if --start-block option is specified. '
'Either remove the {} file or the --start-block option.'
.format(last_synced_block_file, last_synced_block_file))
write_last_synced_block(last_synced_block_file, start_block)
def read_last_synced_block(file):
with smart_open(file, 'r') as last_synced_block_file:
return int(last_synced_block_file.read())
def write_to_file(file, content):
with smart_open(file, 'w') as file_handle:
file_handle.write(content)

View File

@@ -0,0 +1,13 @@
class StreamerAdapterStub:
def open(self):
pass
def get_current_block_number(self):
return 0
def export_all(self, start_block, end_block):
pass
def close(self):
pass

View File

@@ -0,0 +1,19 @@
import logging
import signal
import sys
from blockchainetl.logging_utils import logging_basic_config
def configure_signals():
def sigterm_handler(_signo, _stack_frame):
# Raises SystemExit(0):
sys.exit(0)
signal.signal(signal.SIGTERM, sigterm_handler)
def configure_logging(filename):
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
logging_basic_config(filename=filename)

42
docs/amazon-athena.md Normal file
View File

@@ -0,0 +1,42 @@
# Amazon Athena
## Querying in Amazon Athena
- Upload the files to S3:
```bash
> cd output
> aws s3 sync . s3://<your_bucket>/ethereumetl/export --region ap-southeast-1
```
- Sign in to Athena https://console.aws.amazon.com/athena/home
- Create a database:
```sql
CREATE DATABASE ethereumetl;
```
- Create the tables:
- blocks: [schemas/aws/blocks.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/blocks.sql)
- transactions: [schemas/aws/transactions.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/transactions.sql)
- token_transfers: [schemas/aws/token_transfers.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/token_transfers.sql)
- contracts: [schemas/aws/contracts.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/contracts.sql)
- receipts: [schemas/aws/receipts.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/receipts.sql)
- logs: [schemas/aws/logs.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/logs.sql)
- tokens: [schemas/aws/tokens.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/tokens.sql)
## Airflow DAGs
Refer to https://github.com/medvedev1088/ethereum-etl-airflow for the instructions.
## Tables for Parquet Files
Read [this article](https://medium.com/@medvedev1088/converting-ethereum-etl-files-to-parquet-399e048ddd30) on how to convert CSVs to Parquet.
- Create the tables:
- parquet_blocks: [schemas/aws/parquet/parquet_blocks.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/parquet/parquet_blocks.sql)
- parquet_transactions: [schemas/aws/parquet/parquet_transactions.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/parquet/parquet_transactions.sql)
- parquet_token_transfers: [schemas/aws/parquet/parquet_token_transfers.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/parquet/parquet_token_transfers.sql)
Note that [DECIMAL type is limited to 38 digits in Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-decimal) so values greater than 38 decimals will be null.

10
docs/citing.md Normal file
View File

@@ -0,0 +1,10 @@
## How to Cite
```
@misc{ethereumetl,
author = {Evgeny Medvedev and the D5 team},
title = {Ethereum ETL},
year = {2018},
url = {https://github.com/blockchain-etl/ethereum-etl}
}
```

242
docs/commands.md Normal file
View File

@@ -0,0 +1,242 @@
# Commands
All the commands accept `-h` parameter for help, e.g.:
```bash
> ethereumetl export_blocks_and_transactions -h
Usage: ethereumetl export_blocks_and_transactions [OPTIONS]
Export blocks and transactions.
Options:
-s, --start-block INTEGER Start block
-e, --end-block INTEGER End block [required]
-b, --batch-size INTEGER The number of blocks to export at a time.
-p, --provider-uri TEXT The URI of the web3 provider e.g.
file://$HOME/Library/Ethereum/geth.ipc or
https://mainnet.infura.io
-w, --max-workers INTEGER The maximum number of workers.
--blocks-output TEXT The output file for blocks. If not provided
blocks will not be exported. Use "-" for stdout
--transactions-output TEXT The output file for transactions. If not
provided transactions will not be exported. Use
"-" for stdout
-h, --help Show this message and exit.
```
For the `--output` parameters the supported types are csv and json. The format type is inferred from the output file name.
#### export_blocks_and_transactions
```bash
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc \
--blocks-output blocks.csv --transactions-output transactions.csv
```
Omit `--blocks-output` or `--transactions-output` options if you want to export only transactions/blocks.
You can tune `--batch-size`, `--max-workers` for performance.
[Blocks and transactions schema](schema.md#blockscsv).
#### export_token_transfers
The API used in this command is not supported by Infura, so you will need a local node.
If you want to use Infura for exporting ERC20 transfers refer to [extract_token_transfers](#extract_token_transfers)
```bash
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --batch-size 100 --output token_transfers.csv
```
Include `--tokens <token1> --tokens <token2>` to filter only certain tokens, e.g.
```bash
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output token_transfers.csv \
--tokens 0x86fa049857e0209aa7d9e616f7eb3b3b78ecfdb0 --tokens 0x06012c8cf97bead5deae237070f9587f8e7a266d
```
You can tune `--batch-size`, `--max-workers` for performance.
[Token transfers schema](schema.md#token_transferscsv).
#### export_receipts_and_logs
First extract transaction hashes from `transactions.csv`
(Exported with [export_blocks_and_transactions](#export_blocks_and_transactions)):
```bash
> ethereumetl extract_csv_column --input transactions.csv --column hash --output transaction_hashes.txt
```
Then export receipts and logs:
```bash
> ethereumetl export_receipts_and_logs --transaction-hashes transaction_hashes.txt \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --receipts-output receipts.csv --logs-output logs.csv
```
Omit `--receipts-output` or `--logs-output` options if you want to export only logs/receipts.
You can tune `--batch-size`, `--max-workers` for performance.
Upvote this feature request https://github.com/paritytech/parity/issues/9075,
it will make receipts and logs export much faster.
[Receipts and logs schema](schema.md#receiptscsv).
#### extract_token_transfers
First export receipt logs with [export_receipts_and_logs](#export_receipts_and_logs).
Then extract transfers from the logs.csv file:
```bash
> ethereumetl extract_token_transfers --logs logs.csv --output token_transfers.csv
```
You can tune `--batch-size`, `--max-workers` for performance.
[Token transfers schema](schema.md#token_transferscsv).
#### export_contracts
First extract contract addresses from `receipts.csv`
(Exported with [export_receipts_and_logs](#export_receipts_and_logs)):
```bash
> ethereumetl extract_csv_column --input receipts.csv --column contract_address --output contract_addresses.txt
```
Then export contracts:
```bash
> ethereumetl export_contracts --contract-addresses contract_addresses.txt \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output contracts.csv
```
You can tune `--batch-size`, `--max-workers` for performance.
[Contracts schema](schema.md#contractscsv).
#### export_tokens
First extract token addresses from `contracts.json`
(Exported with [export_contracts](#export_contracts)):
```bash
> ethereumetl filter_items -i contracts.json -p "item['is_erc20'] or item['is_erc721']" | \
ethereumetl extract_field -f address -o token_addresses.txt
```
Then export ERC20 / ERC721 tokens:
```bash
> ethereumetl export_tokens --token-addresses token_addresses.txt \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output tokens.csv
```
You can tune `--max-workers` for performance.
[Tokens schema](schema.md#tokenscsv).
#### export_traces
Also called internal transactions.
The API used in this command is not supported by Infura,
so you will need a local Parity archive node (`parity --tracing on`).
Make sure your node has at least 8GB of memory, or else you will face timeout errors.
See [this issue](https://github.com/blockchain-etl/ethereum-etl/issues/137)
```bash
> ethereumetl export_traces --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/parity.ipc --batch-size 100 --output traces.csv
```
You can tune `--batch-size`, `--max-workers` for performance.
[Traces schema](schema.md#tracescsv).
#### export_geth_traces
Read [Differences between geth and parity traces.csv](schema.md#differences-between-geth-and-parity-tracescsv)
The API used in this command is not supported by Infura,
so you will need a local Geth archive node (`geth --gcmode archive --syncmode full --ipcapi debug`).
When using rpc, add `--rpc --rpcapi debug` options.
```bash
> ethereumetl export_geth_traces --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --batch-size 100 --output geth_traces.json
```
You can tune `--batch-size`, `--max-workers` for performance.
#### extract_geth_traces
```bash
> ethereumetl extract_geth_traces --input geth_traces.json --output traces.csv
```
You can tune `--batch-size`, `--max-workers` for performance.
#### get_block_range_for_date
```bash
> ethereumetl get_block_range_for_date --provider-uri=https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c --date 2018-01-01
4832686,4838611
```
#### get_keccak_hash
```bash
> ethereumetl get_keccak_hash -i "transfer(address,uint256)"
0xa9059cbb2ab09eb219583f4a59a5d0623ade346d962bcd4e46b11da047c9049b
```
#### stream
```bash
> pip3 install ethereum-etl[streaming]
> ethereumetl stream --provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c --start-block 500000
```
- This command outputs blocks, transactions, logs, token_transfers to the console by default.
- Entity types can be specified with the `-e` option,
e.g. `-e block,transaction,log,token_transfer,trace,contract,token`.
- Use `--output` option to specify the Google Pub/Sub topic or Postgres database where to publish blockchain data,
- For Google PubSub: `--output=projects/<your-project>/topics/crypto_ethereum`.
Data will be pushed to `projects/<your-project>/topics/crypto_ethereum.blocks`, `projects/<your-project>/topics/crypto_ethereum.transactions` etc. topics.
- For Postgres: `--output=postgresql+pg8000://<user>:<password>@<host>:<port>/<database_name>`,
e.g. `--output=postgresql+pg8000://postgres:admin@127.0.0.1:5432/ethereum`.
The [schema](https://github.com/blockchain-etl/ethereum-etl-postgres/tree/master/schema)
and [indexes](https://github.com/blockchain-etl/ethereum-etl-postgres/tree/master/indexes) can be found in this
repo [ethereum-etl-postgres](https://github.com/blockchain-etl/ethereum-etl-postgres).
- The command saves its state to `last_synced_block.txt` file where the last synced block number is saved periodically.
- Specify either `--start-block` or `--last-synced-block-file` option. `--last-synced-block-file` should point to the
file where the block number, from which to start streaming the blockchain data, is saved.
- Use the `--lag` option to specify how many blocks to lag behind the head of the blockchain. It's the simplest way to
handle chain reorganizations - they are less likely the further a block from the head.
- You can tune `--period-seconds`, `--batch-size`, `--block-batch-size`, `--max-workers` for performance.
- Refer to [blockchain-etl-streaming](https://github.com/blockchain-etl/blockchain-etl-streaming) for
instructions on deploying it to Kubernetes.
Stream blockchain data continually to Google Pub/Sub:
```bash
> export GOOGLE_APPLICATION_CREDENTIALS=/path_to_credentials_file.json
> ethereumetl stream --start-block 500000 --output projects/<your-project>/topics/crypto_ethereum
```
Stream blockchain data to a Postgres database:
```bash
ethereumetl stream --start-block 500000 --output postgresql+pg8000://<user>:<password>@<host>:5432/<database>
```
The [schema](https://github.com/blockchain-etl/ethereum-etl-postgres/tree/master/schema)
and [indexes](https://github.com/blockchain-etl/ethereum-etl-postgres/tree/master/indexes) can be found in this
repo [ethereum-etl-postgres](https://github.com/blockchain-etl/ethereum-etl-postgres).

4
docs/contact.md Normal file
View File

@@ -0,0 +1,4 @@
# Contact
- [D5 Discord Server](https://discord.gg/wukrezR)
- [Telegram Group](https://t.me/joinchat/GsMpbA3mv1OJ6YMp3T5ORQ)

11
docs/dockerhub.md Normal file
View File

@@ -0,0 +1,11 @@
# Uploading to Docker Hub
```bash
ETHEREUMETL_VERSION=1.6.0-ordering2
docker build -t ethereum-etl:${ETHEREUMETL_VERSION} -f Dockerfile .
docker tag ethereum-etl:${ETHEREUMETL_VERSION} blockchainetl/ethereum-etl:${ETHEREUMETL_VERSION}
docker push blockchainetl/ethereum-etl:${ETHEREUMETL_VERSION}
docker tag ethereum-etl:${ETHEREUMETL_VERSION} blockchainetl/ethereum-etl:latest
docker push blockchainetl/ethereum-etl:latest
```

4
docs/ethereum-classic.md Normal file
View File

@@ -0,0 +1,4 @@
# Ethereum Classic
For getting ETC csv files, make sure you pass in the `--chain classic` param where it's required for the scripts you want to export.
ETC won't run if your `--provider-uri` is Infura. It will provide a warning and change the provider-uri to `https://ethereumclassic.network` instead. For faster performance, run a client instead locally for classic such as `parity chain=classic` and Geth-classic.

View File

@@ -0,0 +1,51 @@
## Exporting the Blockchain
If you'd like to have blockchain data set up and hosted for you, [get in touch with us at D5](https://d5.ai/?ref=ethereumetl).
1. Install python 3.5.3+ https://www.python.org/downloads/
1. You can use Infura if you don't need ERC20 transfers (Infura doesn't support eth_getFilterLogs JSON RPC method).
For that use `-p https://mainnet.infura.io` option for the commands below. If you need ERC20 transfers or want to
export the data ~40 times faster, you will need to set up a local Ethereum node:
1. Install geth https://github.com/ethereum/go-ethereum/wiki/Installing-Geth
1. Start geth.
Make sure it downloaded the blocks that you need by executing `eth.syncing` in the JS console.
You can export blocks below `currentBlock`,
there is no need to wait until the full sync as the state is not needed (unless you also need contracts bytecode
and token details; for those you need to wait until the full sync).
1. Install Ethereum ETL: `> pip3 install ethereum-etl`
1. Export all:
```bash
> ethereumetl export_all --help
> ethereumetl export_all -s 0 -e 5999999 -b 100000 -p file://$HOME/Library/Ethereum/geth.ipc -o output
```
In case `ethereumetl` command is not available in PATH, use `python3 -m ethereumetl` instead.
The result will be in the `output` subdirectory, partitioned in Hive style:
```bash
output/blocks/start_block=00000000/end_block=00099999/blocks_00000000_00099999.csv
output/blocks/start_block=00100000/end_block=00199999/blocks_00100000_00199999.csv
...
output/transactions/start_block=00000000/end_block=00099999/transactions_00000000_00099999.csv
...
output/token_transfers/start_block=00000000/end_block=00099999/token_transfers_00000000_00099999.csv
...
```
Should work with geth and parity, on Linux, Mac, Windows.
If you use Parity you should disable warp mode with `--no-warp` option because warp mode
does not place all of the block or receipt data into the database https://wiki.parity.io/Getting-Synced
If you see weird behavior, e.g. wrong number of rows in the CSV files or corrupted files,
check out this issue: https://github.com/medvedev1088/ethereum-etl/issues/28
### Export in 2 Hours
You can use AWS Auto Scaling and Data Pipeline to reduce the exporting time to a few hours.
Read [this article](https://medium.com/@medvedev1088/how-to-export-the-entire-ethereum-blockchain-to-csv-in-2-hours-for-10-69fef511e9a2) for details.

19
docs/google-bigquery.md Normal file
View File

@@ -0,0 +1,19 @@
# Google BiqQuery
## Querying in BigQuery
If you'd rather not export the blockchain data yourself, we publish all tables as a public dataset in [BigQuery](https://medium.com/@medvedev1088/ethereum-blockchain-on-google-bigquery-283fb300f579).
Data is updated near real-time (~4-minute delay to account for block finality).
### How to Query Balances for all Ethereum Addresses
Read [this article](https://medium.com/google-cloud/how-to-query-balances-for-all-ethereum-addresses-in-bigquery-fb594e4034a7).
### Building Token Recommender in Google Cloud Platform
Read [this article](https://medium.com/google-cloud/building-token-recommender-in-google-cloud-platform-1be5a54698eb).
### Awesome BigQuery Views
https://github.com/blockchain-etl/awesome-bigquery-views

24
docs/index.md Normal file
View File

@@ -0,0 +1,24 @@
# Overview
Ethereum ETL lets you convert blockchain data into convenient formats like CSVs and relational databases.
With 700+ likes on Github, Ethereum ETL is the most popular open source project for Ethereum data.
Data is available for you to query right away in [Google BigQuery](https://goo.gl/oY5BCQ).
## Features
Easily export:
* Blocks
* Transactions
* ERC20 / ERC721 tokens
* Token transfers
* Receipts
* Logs
* Contracts
* Internal transactions
## Projects using Ethereum ETL
* [Google](https://goo.gl/oY5BCQ) - Public BigQuery Ethereum datasets
* [Nansen](https://nansen.ai/?ref=ethereumetl) - Analytics platform for Ethereum

15
docs/limitations.md Normal file
View File

@@ -0,0 +1,15 @@
# Limitation
- In case the contract is a proxy, which forwards all calls to a delegate, interface detection doesnt work,
which means `is_erc20` and `is_erc721` will always be false for proxy contracts and they will be missing in the `tokens`
table.
- The metadata methods (`symbol`, `name`, `decimals`, `total_supply`) for ERC20 are optional, so around 10% of the
contracts are missing this data. Also some contracts (EOS) implement these methods but with wrong return type,
so the metadata columns are missing in this case as well.
- `token_transfers.value`, `tokens.decimals` and `tokens.total_supply` have type `STRING` in BigQuery tables,
because numeric types there can't handle 32-byte integers. You should use
`cast(value as FLOAT64)` (possible loss of precision) or
`safe_cast(value as NUMERIC)` (possible overflow) to convert to numbers.
- The contracts that don't implement `decimals()` function but have the
[fallback function](https://solidity.readthedocs.io/en/v0.4.21/contracts.html#fallback-function) that returns a `boolean`
will have `0` or `1` in the `decimals` column in the CSVs.

11
docs/media.md Normal file
View File

@@ -0,0 +1,11 @@
## Ethereum ETL in the Media
- [A Technical Breakdown Of Google's New Blockchain Search Tools](https://www.forbes.com/sites/michaeldelcastillo/2019/02/05/google-launches-search-for-bitcoin-ethereum-bitcoin-cash-dash-dogecoin-ethereum-classic-litecoin-and-zcash/#394fc868c789)
- [Navigating Bitcoin, Ethereum, XRP: How Google Is Quietly Making Blockchains Searchable](https://www.forbes.com/sites/michaeldelcastillo/2019/02/04/navigating-bitcoin-ethereum-xrp-how-google-is-quietly-making-blockchains-searchable/?ss=crypto-blockchain#49e111da4248)
- [Ethereum in BigQuery: a Public Dataset for smart contract analytics](https://cloud.google.com/blog/products/data-analytics/ethereum-bigquery-public-dataset-smart-contract-analytics)
- [Ethereum in BigQuery:how we built this dataset](https://cloud.google.com/blog/products/data-analytics/ethereum-bigquery-how-we-built-dataset)
- [Introducing six new cryptocurrencies in BigQuery Public Datasets—and how to analyze them](https://cloud.google.com/blog/products/data-analytics/introducing-six-new-cryptocurrencies-in-bigquery-public-datasets-and-how-to-analyze-them)
- [Querying the Ethereum Blockchain in Snowflake](https://community.snowflake.com/s/article/Querying-the-Ethereum-Blockchain-in-Snowflake)
- [ConsenSys Grants funds third cohort of projects to benefit the Ethereum ecosystem](https://www.cryptoninjas.net/2020/02/17/consensys-grants-funds-third-cohort-of-projects-to-benefit-the-ethereum-ecosystem/)
- [Ivan on Tech overviews crypto datasets in BigQuery](https://youtu.be/2IkJBNhsXNY?t=239)
- [Unlocking the Power of Google BigQuery (Cloud Next '19)](https://youtu.be/KL_i5XZIaJg?t=131)

45
docs/quickstart.md Normal file
View File

@@ -0,0 +1,45 @@
# Quickstart
Install Ethereum ETL:
```bash
pip3 install ethereum-etl
```
Export blocks and transactions:
```bash
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
--provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c --blocks-output blocks.csv --transactions-output transactions.csv
```
Export ERC20 and ERC721 transfers:
```bash
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output token_transfers.csv
```
Export traces:
```bash
> ethereumetl export_traces --start-block 0 --end-block 500000 \
--provider-uri file://$HOME/Library/Ethereum/parity.ipc --output traces.csv
```
Stream blocks, transactions, logs, token_transfers continually to console:
```bash
> pip3 install ethereum-etl[streaming]
> ethereumetl stream --start-block 500000 -e block,transaction,log,token_transfer --log-file log.txt
```
Find all commands [here](commands.md).
---
To run the latest version of Ethereum ETL, check out the repo and call
```bash
> pip3 install -e .
> python3 ethereumetl.py
```

153
docs/schema.md Normal file
View File

@@ -0,0 +1,153 @@
# Schema
## blocks.csv
Column | Type |
------------------|--------------------|
number | bigint |
hash | hex_string |
parent_hash | hex_string |
nonce | hex_string |
sha3_uncles | hex_string |
logs_bloom | hex_string |
transactions_root | hex_string |
state_root | hex_string |
receipts_root | hex_string |
miner | address |
difficulty | numeric |
total_difficulty | numeric |
size | bigint |
extra_data | hex_string |
gas_limit | bigint |
gas_used | bigint |
timestamp | bigint |
transaction_count | bigint |
---
## transactions.csv
Column | Type |
-----------------|-------------|
hash | hex_string |
nonce | bigint |
block_hash | hex_string |
block_number | bigint |
transaction_index| bigint |
from_address | address |
to_address | address |
value | numeric |
gas | bigint |
gas_price | bigint |
input | hex_string |
block_timestamp | bigint |
---
## token_transfers.csv
Column | Type |
--------------------|-------------|
token_address | address |
from_address | address |
to_address | address |
value | numeric |
transaction_hash | hex_string |
log_index | bigint |
block_number | bigint |
---
## receipts.csv
Column | Type |
-----------------------------|-------------|
transaction_hash | hex_string |
transaction_index | bigint |
block_hash | hex_string |
block_number | bigint |
cumulative_gas_used | bigint |
gas_used | bigint |
contract_address | address |
root | hex_string |
status | bigint |
---
## logs.csv
Column | Type |
-------------------------|-------------|
log_index | bigint |
transaction_hash | hex_string |
transaction_index | bigint |
block_hash | hex_string |
block_number | bigint |
address | address |
data | hex_string |
topics | string |
---
## contracts.csv
Column | Type |
-----------------------------|-------------|
address | address |
bytecode | hex_string |
function_sighashes | string |
is_erc20 | boolean |
is_erc721 | boolean |
block_number | bigint |
---
## tokens.csv
Column | Type |
-----------------------------|-------------|
address | address |
symbol | string |
name | string |
decimals | bigint |
total_supply | numeric |
---
## traces.csv
Column | Type |
-----------------------------|-------------|
block_number | bigint |
transaction_hash | hex_string |
transaction_index | bigint |
from_address | address |
to_address | address |
value | numeric |
input | hex_string |
output | hex_string |
trace_type | string |
call_type | string |
reward_type | string |
gas | bigint |
gas_used | bigint |
subtraces | bigint |
trace_address | string |
error | string |
status | bigint |
trace_id | string |
### Differences between geth and parity traces.csv
- `to_address` field differs for `callcode` trace (geth seems to return correct value, as parity value of `to_address` is same as `to_address` of parent call);
- geth output doesn't have `reward` traces;
- geth output doesn't have `to_address`, `from_address`, `value` for `suicide` traces;
- `error` field contains human readable error message, which might differ in geth/parity output;
- geth output doesn't have `transaction_hash`;
- `gas_used` is 0 on traces with error in geth, empty in parity;
- zero output of subcalls is `0x000...` in geth, `0x` in parity;
You can find column descriptions in [https://github.com/medvedev1088/ethereum-etl-airflow](https://github.com/medvedev1088/ethereum-etl-airflow/tree/master/dags/resources/stages/raw/schemas)
Note: for the `address` type all hex characters are lower-cased.
`boolean` type can have 2 values: `True` or `False`.

View File

@@ -25,21 +25,26 @@ from ethereumetl.cli.export_all import export_all
from ethereumetl.cli.export_blocks_and_transactions import export_blocks_and_transactions
from ethereumetl.cli.export_contracts import export_contracts
from ethereumetl.cli.export_geth_traces import export_geth_traces
from ethereumetl.cli.export_origin import export_origin
from ethereumetl.cli.export_receipts_and_logs import export_receipts_and_logs
from ethereumetl.cli.export_token_transfers import export_token_transfers
from ethereumetl.cli.export_tokens import export_tokens
from ethereumetl.cli.export_traces import export_traces
from ethereumetl.cli.extract_contracts import extract_contracts
from ethereumetl.cli.extract_csv_column import extract_csv_column
from ethereumetl.cli.extract_field import extract_field
from ethereumetl.cli.extract_geth_traces import extract_geth_traces
from ethereumetl.cli.extract_token_transfers import extract_token_transfers
from ethereumetl.cli.extract_tokens import extract_tokens
from ethereumetl.cli.filter_items import filter_items
from ethereumetl.cli.get_block_range_for_date import get_block_range_for_date
from ethereumetl.cli.get_block_range_for_timestamps import get_block_range_for_timestamps
from ethereumetl.cli.get_keccak_hash import get_keccak_hash
from ethereumetl.cli.stream import stream
@click.group()
@click.version_option(version='1.6.0')
@click.pass_context
def cli(ctx):
pass
@@ -48,6 +53,7 @@ def cli(ctx):
# export
cli.add_command(export_all, "export_all")
cli.add_command(export_blocks_and_transactions, "export_blocks_and_transactions")
cli.add_command(export_origin, "export_origin")
cli.add_command(export_receipts_and_logs, "export_receipts_and_logs")
cli.add_command(export_token_transfers, "export_token_transfers")
cli.add_command(extract_token_transfers, "extract_token_transfers")
@@ -56,6 +62,11 @@ cli.add_command(export_tokens, "export_tokens")
cli.add_command(export_traces, "export_traces")
cli.add_command(export_geth_traces, "export_geth_traces")
cli.add_command(extract_geth_traces, "extract_geth_traces")
cli.add_command(extract_contracts, "extract_contracts")
cli.add_command(extract_tokens, "extract_tokens")
# streaming
cli.add_command(stream, "stream")
# utils
cli.add_command(get_block_range_for_date, "get_block_range_for_date")

View File

@@ -25,11 +25,16 @@ import click
import re
from datetime import datetime, timedelta
from blockchainetl.logging_utils import logging_basic_config
from web3 import Web3
from ethereumetl.jobs.export_all_common import export_all_common
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.service.eth_service import EthService
from ethereumetl.utils import check_classic_provider_uri
logging_basic_config()
def is_date_range(start, end):
@@ -74,7 +79,7 @@ def get_partitions(start, end, partition_batch_size, provider_uri):
while start_date <= end_date:
batch_start_block, batch_end_block = eth_service.get_block_range_for_date(start_date)
partition_dir = f'/date={str(start_date)}/'
partition_dir = '/date={start_date!s}/'.format(start_date=start_date)
yield batch_start_block, batch_end_block, partition_dir
start_date += day
@@ -89,7 +94,10 @@ def get_partitions(start, end, partition_batch_size, provider_uri):
padded_batch_start_block = str(batch_start_block).zfill(8)
padded_batch_end_block = str(batch_end_block).zfill(8)
partition_dir = f'/start_block={padded_batch_start_block}/end_block={padded_batch_end_block}'
partition_dir = '/start_block={padded_batch_start_block}/end_block={padded_batch_end_block}'.format(
padded_batch_start_block=padded_batch_start_block,
padded_batch_end_block=padded_batch_end_block,
)
yield batch_start_block, batch_end_block, partition_dir
else:
@@ -99,15 +107,18 @@ def get_partitions(start, end, partition_batch_size, provider_uri):
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-s', '--start', required=True, type=str, help='Start block/ISO date/Unix time')
@click.option('-e', '--end', required=True, type=str, help='End block/ISO date/Unix time')
@click.option('-b', '--partition-batch-size', default=10000, type=int,
@click.option('-b', '--partition-batch-size', default=10000, show_default=True, type=int,
help='The number of blocks to export in partition.')
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
help='The URI of the web3 provider e.g. '
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
@click.option('-o', '--output-dir', default='output', type=str, help='Output directory, partitioned in Hive style.')
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
@click.option('-B', '--export-batch-size', default=100, type=int, help='The number of requests in JSON RPC batches.')
def export_all(start, end, partition_batch_size, provider_uri, output_dir, max_workers, export_batch_size):
"""Exports all for a range of blocks."""
@click.option('-o', '--output-dir', default='output', show_default=True, type=str, help='Output directory, partitioned in Hive style.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
@click.option('-B', '--export-batch-size', default=100, show_default=True, type=int, help='The number of requests in JSON RPC batches.')
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
def export_all(start, end, partition_batch_size, provider_uri, output_dir, max_workers, export_batch_size,
chain='ethereum'):
"""Exports all data for a range of blocks."""
provider_uri = check_classic_provider_uri(chain, provider_uri)
export_all_common(get_partitions(start, end, partition_batch_size, provider_uri),
output_dir, provider_uri, max_workers, export_batch_size)

View File

@@ -25,28 +25,32 @@ import click
from ethereumetl.jobs.export_blocks_job import ExportBlocksJob
from ethereumetl.jobs.exporters.blocks_and_transactions_item_exporter import blocks_and_transactions_item_exporter
from ethereumetl.logging_utils import logging_basic_config
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.thread_local_proxy import ThreadLocalProxy
from ethereumetl.utils import check_classic_provider_uri
logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-s', '--start-block', default=0, type=int, help='Start block')
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
@click.option('-e', '--end-block', required=True, type=int, help='End block')
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to export at a time.')
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to export at a time.')
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
help='The URI of the web3 provider e.g. '
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
@click.option('--blocks-output', default=None, type=str,
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
@click.option('--blocks-output', default=None, show_default=True, type=str,
help='The output file for blocks. If not provided blocks will not be exported. Use "-" for stdout')
@click.option('--transactions-output', default=None, type=str,
@click.option('--transactions-output', default=None, show_default=True, type=str,
help='The output file for transactions. '
'If not provided transactions will not be exported. Use "-" for stdout')
def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, max_workers, blocks_output, transactions_output):
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, max_workers, blocks_output,
transactions_output, chain='ethereum'):
"""Exports blocks and transactions."""
provider_uri = check_classic_provider_uri(chain, provider_uri)
if blocks_output is None and transactions_output is None:
raise ValueError('Either --blocks-output or --transactions-output options must be provided')

View File

@@ -23,27 +23,30 @@
import click
from ethereumetl.file_utils import smart_open
from blockchainetl.file_utils import smart_open
from ethereumetl.jobs.export_contracts_job import ExportContractsJob
from ethereumetl.jobs.exporters.contracts_item_exporter import contracts_item_exporter
from ethereumetl.logging_utils import logging_basic_config
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.thread_local_proxy import ThreadLocalProxy
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.utils import check_classic_provider_uri
logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to filter at a time.')
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
@click.option('-c', '--contract-addresses', required=True, type=str,
help='The file containing contract addresses, one per line.')
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
help='The URI of the web3 provider e.g. '
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
def export_contracts(batch_size, contract_addresses, output, max_workers, provider_uri):
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
def export_contracts(batch_size, contract_addresses, output, max_workers, provider_uri, chain='ethereum'):
"""Exports contracts bytecode and sighashes."""
check_classic_provider_uri(chain, provider_uri)
with smart_open(contract_addresses, 'r') as contract_addresses_file:
contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file
if contract_address.strip())

View File

@@ -25,7 +25,7 @@ import click
from ethereumetl.jobs.export_geth_traces_job import ExportGethTracesJob
from ethereumetl.jobs.exporters.geth_traces_item_exporter import geth_traces_item_exporter
from ethereumetl.logging_utils import logging_basic_config
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.thread_local_proxy import ThreadLocalProxy
@@ -33,12 +33,12 @@ logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-s', '--start-block', default=0, type=int, help='Start block')
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
@click.option('-e', '--end-block', required=True, type=int, help='End block')
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to process at a time.')
@click.option('-o', '--output', default='-', type=str,
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to process at a time.')
@click.option('-o', '--output', default='-', show_default=True, type=str,
help='The output file for geth traces. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
@click.option('-p', '--provider-uri', required=True, type=str,
help='The URI of the web3 provider e.g. '
'file://$HOME/Library/Ethereum/geth.ipc or http://localhost:8545/')

View File

@@ -0,0 +1,56 @@
# A job to export data from Origin Protocol.
#
# Origin Protocol is an open source platform for implementing blockchain e-commerce.
# More details at https://www.originprotool.com
#
# The core of the platform is the marketplace smart contract:
# - Code: https://etherscan.io/address/0x698ff47b84837d3971118a369c570172ee7e54c2
# - Address: https://github.com/OriginProtocol/origin/blob/master/packages/contracts/contracts/marketplace/V01_Marketplace.sol
#
# Transactional data is stored on-chain, while side-metadata is stored in IPFS (https://ipfs.io).
#
# Given a range of block numbers, the job queries the blockchain for events emitted by the contract.
# Every event includes a hash pointing to a marketplace listing metadata stored as a JSON file on IPFS.
# A marketplace listing can either be a single self-contained listing, or the entry point for the entire
# catalog of products from a shop.
#
# The job generates 2 data sets:
# - Marketplace listings
# - Shop products.
#
import click
from web3 import Web3
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.jobs.export_origin_job import ExportOriginJob
from ethereumetl.jobs.exporters.origin_exporter import origin_marketplace_listing_item_exporter, origin_shop_product_item_exporter
from ethereumetl.ipfs.origin import get_origin_ipfs_client
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.thread_local_proxy import ThreadLocalProxy
logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
@click.option('-e', '--end-block', required=True, type=int, help='End block')
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
@click.option('--marketplace-output', default='-', show_default=True, type=str, help='The output file for marketplace data. If not specified stdout is used.')
@click.option('--shop-output', default='-', show_default=True, type=str, help='The output file for shop data. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
@click.option('-p', '--provider-uri', required=True, type=str,
help='The URI of the web3 provider e.g. file://$HOME/Library/Ethereum/geth.ipc or http://localhost:8545/')
def export_origin(start_block, end_block, batch_size, marketplace_output, shop_output, max_workers, provider_uri):
"""Exports Origin Protocol data."""
job = ExportOriginJob(
start_block=start_block,
end_block=end_block,
batch_size=batch_size,
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
ipfs_client=get_origin_ipfs_client(),
marketplace_listing_exporter=origin_marketplace_listing_item_exporter(marketplace_output),
shop_product_exporter=origin_shop_product_item_exporter(shop_output),
max_workers=max_workers)
job.run()

View File

@@ -23,31 +23,35 @@
import click
from ethereumetl.file_utils import smart_open
from blockchainetl.file_utils import smart_open
from ethereumetl.jobs.export_receipts_job import ExportReceiptsJob
from ethereumetl.jobs.exporters.receipts_and_logs_item_exporter import receipts_and_logs_item_exporter
from ethereumetl.logging_utils import logging_basic_config
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.thread_local_proxy import ThreadLocalProxy
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.utils import check_classic_provider_uri
logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-b', '--batch-size', default=100, type=int, help='The number of receipts to export at a time.')
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of receipts to export at a time.')
@click.option('-t', '--transaction-hashes', required=True, type=str,
help='The file containing transaction hashes, one per line.')
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
help='The URI of the web3 provider e.g. '
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
@click.option('--receipts-output', default=None, type=str,
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
@click.option('--receipts-output', default=None, show_default=True, type=str,
help='The output file for receipts. If not provided receipts will not be exported. Use "-" for stdout')
@click.option('--logs-output', default=None, type=str,
@click.option('--logs-output', default=None, show_default=True, type=str,
help='The output file for receipt logs. '
'aIf not provided receipt logs will not be exported. Use "-" for stdout')
def export_receipts_and_logs(batch_size, transaction_hashes, provider_uri, max_workers, receipts_output, logs_output):
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
def export_receipts_and_logs(batch_size, transaction_hashes, provider_uri, max_workers, receipts_output, logs_output,
chain='ethereum'):
"""Exports receipts and logs."""
provider_uri = check_classic_provider_uri(chain, provider_uri)
with smart_open(transaction_hashes, 'r') as transaction_hashes_file:
job = ExportReceiptsJob(
transaction_hashes_iterable=(transaction_hash.strip() for transaction_hash in transaction_hashes_file),

View File

@@ -27,7 +27,7 @@ from web3 import Web3
from ethereumetl.jobs.export_token_transfers_job import ExportTokenTransfersJob
from ethereumetl.jobs.exporters.token_transfers_item_exporter import token_transfers_item_exporter
from ethereumetl.logging_utils import logging_basic_config
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.thread_local_proxy import ThreadLocalProxy
@@ -35,14 +35,14 @@ logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-s', '--start-block', default=0, type=int, help='Start block')
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
@click.option('-e', '--end-block', required=True, type=int, help='End block')
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to filter at a time.')
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
@click.option('-p', '--provider-uri', required=True, type=str,
help='The URI of the web3 provider e.g. file://$HOME/Library/Ethereum/geth.ipc or http://localhost:8545/')
@click.option('-t', '--tokens', default=None, type=str, nargs=1, help='The list of token addresses to filter by.')
@click.option('-t', '--tokens', default=None, show_default=True, type=str, multiple=True, help='The list of token addresses to filter by.')
def export_token_transfers(start_block, end_block, batch_size, output, max_workers, provider_uri, tokens):
"""Exports ERC20/ERC721 transfers."""
job = ExportTokenTransfersJob(

View File

@@ -25,25 +25,29 @@ import click
from web3 import Web3
from ethereumetl.file_utils import smart_open
from blockchainetl.file_utils import smart_open
from ethereumetl.jobs.export_tokens_job import ExportTokensJob
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
from ethereumetl.logging_utils import logging_basic_config
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.thread_local_proxy import ThreadLocalProxy
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.utils import check_classic_provider_uri
logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-t', '--token-addresses', type=str, help='The file containing token addresses, one per line.')
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
@click.option('-t', '--token-addresses', required=True, type=str,
help='The file containing token addresses, one per line.')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
help='The URI of the web3 provider e.g. '
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
def export_tokens(token_addresses, output, max_workers, provider_uri):
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
def export_tokens(token_addresses, output, max_workers, provider_uri, chain='ethereum'):
"""Exports ERC20/ERC721 tokens."""
provider_uri = check_classic_provider_uri(chain, provider_uri)
with smart_open(token_addresses, 'r') as token_addresses_file:
job = ExportTokensJob(
token_addresses_iterable=(token_address.strip() for token_address in token_addresses_file),

View File

@@ -26,7 +26,7 @@ import click
from web3 import Web3
from ethereumetl.jobs.export_traces_job import ExportTracesJob
from ethereumetl.logging_utils import logging_basic_config
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.thread_local_proxy import ThreadLocalProxy
from ethereumetl.jobs.exporters.traces_item_exporter import traces_item_exporter
@@ -35,22 +35,32 @@ logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-s', '--start-block', default=0, type=int, help='Start block')
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
@click.option('-e', '--end-block', required=True, type=int, help='End block')
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to filter at a time.')
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
@click.option('-b', '--batch-size', default=5, show_default=True, type=int, help='The number of blocks to filter at a time.')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
@click.option('-p', '--provider-uri', required=True, type=str,
help='The URI of the web3 provider e.g. '
'file://$HOME/.local/share/io.parity.ethereum/jsonrpc.ipc or http://localhost:8545/')
def export_traces(start_block, end_block, batch_size, output, max_workers, provider_uri):
@click.option('--genesis-traces/--no-genesis-traces', default=False, show_default=True, help='Whether to include genesis traces')
@click.option('--daofork-traces/--no-daofork-traces', default=False, show_default=True, help='Whether to include daofork traces')
@click.option('-t', '--timeout', default=60, show_default=True, type=int, help='IPC or HTTP request timeout.')
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
def export_traces(start_block, end_block, batch_size, output, max_workers, provider_uri,
genesis_traces, daofork_traces, timeout=60, chain='ethereum'):
"""Exports traces from parity node."""
if chain == 'classic' and daofork_traces == True:
raise ValueError(
'Classic chain does not include daofork traces. Disable daofork traces with --no-daofork-traces option.')
job = ExportTracesJob(
start_block=start_block,
end_block=end_block,
batch_size=batch_size,
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri, timeout=timeout))),
item_exporter=traces_item_exporter(output),
max_workers=max_workers)
max_workers=max_workers,
include_genesis_traces=genesis_traces,
include_daofork_traces=daofork_traces)
job.run()

View File

@@ -0,0 +1,58 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import csv
import json
import click
from blockchainetl.csv_utils import set_max_field_size_limit
from blockchainetl.file_utils import smart_open
from ethereumetl.jobs.exporters.contracts_item_exporter import contracts_item_exporter
from ethereumetl.jobs.extract_contracts_job import ExtractContractsJob
from blockchainetl.logging_utils import logging_basic_config
logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-t', '--traces', type=str, required=True, help='The CSV file containing traces.')
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
def extract_contracts(traces, batch_size, output, max_workers):
"""Extracts contracts from traces file."""
set_max_field_size_limit()
with smart_open(traces, 'r') as traces_file:
if traces.endswith('.json'):
traces_iterable = (json.loads(line) for line in traces_file)
else:
traces_iterable = csv.DictReader(traces_file)
job = ExtractContractsJob(
traces_iterable=traces_iterable,
batch_size=batch_size,
max_workers=max_workers,
item_exporter=contracts_item_exporter(output))
job.run()

View File

@@ -25,15 +25,15 @@ import click
import csv
from ethereumetl.csv_utils import set_max_field_size_limit
from ethereumetl.file_utils import smart_open
from blockchainetl.file_utils import smart_open
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-i', '--input', default='-', type=str, help='The input file. If not specified stdin is used.')
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
@click.option('-i', '--input', default='-', show_default=True, type=str, help='The input file. If not specified stdin is used.')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-c', '--column', required=True, type=str, help='The csv column name to extract.')
def extract_csv_column(input, output, column):
"""Extracts column from given CSV file."""
"""Extracts column from given CSV file. Deprecated - use extract_field."""
set_max_field_size_limit()
with smart_open(input, 'r') as input_file, smart_open(output, 'w') as output_file:

View File

@@ -21,21 +21,15 @@
# SOFTWARE.
import json
import click
from ethereumetl.file_utils import smart_open
from ethereumetl import misc_utils
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-i', '--input', default='-', type=str, help='The input file. If not specified stdin is used.')
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
@click.option('-i', '--input', default='-', show_default=True, type=str, help='The input file. If not specified stdin is used.')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-f', '--field', required=True, type=str, help='The field name to extract.')
def extract_field(input, output, field):
"""Extracts field from given JSON lines file."""
# TODO: Add support for CSV
with smart_open(input, 'r') as input_file, smart_open(output, 'w') as output_file:
for line in input_file:
item = json.loads(line)
output_file.write(item[field] + '\n')
"""Extracts field from given CSV or JSON newline-delimited file."""
misc_utils.extract_field(input, output, field)

View File

@@ -24,19 +24,19 @@ import json
import click
from ethereumetl.file_utils import smart_open
from blockchainetl.file_utils import smart_open
from ethereumetl.jobs.exporters.traces_item_exporter import traces_item_exporter
from ethereumetl.jobs.extract_geth_traces_job import ExtractGethTracesJob
from ethereumetl.logging_utils import logging_basic_config
from blockchainetl.logging_utils import logging_basic_config
logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-i', '--input', required=True, type=str, help='The JSON file containing geth traces.')
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to filter at a time.')
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
def extract_geth_traces(input, batch_size, output, max_workers):
"""Extracts geth traces from JSON lines file."""
with smart_open(input, 'r') as geth_traces_file:

View File

@@ -25,19 +25,19 @@ import click
import csv
import json
from ethereumetl.file_utils import smart_open
from blockchainetl.file_utils import smart_open
from ethereumetl.jobs.exporters.token_transfers_item_exporter import token_transfers_item_exporter
from ethereumetl.jobs.extract_token_transfers_job import ExtractTokenTransfersJob
from ethereumetl.logging_utils import logging_basic_config
from blockchainetl.logging_utils import logging_basic_config
logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-l', '--logs', type=str, required=True, help='The CSV file containing receipt logs.')
@click.option('-b', '--batch-size', default=100, type=int, help='The number of blocks to filter at a time.')
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.')
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
def extract_token_transfers(logs, batch_size, output, max_workers):
"""Extracts ERC20/ERC721 transfers from logs file."""
with smart_open(logs, 'r') as logs_file:

View File

@@ -0,0 +1,63 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import csv
import json
import click
from blockchainetl.csv_utils import set_max_field_size_limit
from blockchainetl.file_utils import smart_open
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
from ethereumetl.jobs.extract_tokens_job import ExtractTokensJob
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.thread_local_proxy import ThreadLocalProxy
from web3 import Web3
logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-c', '--contracts', type=str, required=True, help='The JSON file containing contracts.')
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
help='The URI of the web3 provider e.g. '
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
def extract_tokens(contracts, provider_uri, output, max_workers):
"""Extracts tokens from contracts file."""
set_max_field_size_limit()
with smart_open(contracts, 'r') as contracts_file:
if contracts.endswith('.json'):
contracts_iterable = (json.loads(line) for line in contracts_file)
else:
contracts_iterable = csv.DictReader(contracts_file)
job = ExtractTokensJob(
contracts_iterable=contracts_iterable,
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
max_workers=max_workers,
item_exporter=tokens_item_exporter(output))
job.run()

View File

@@ -20,24 +20,18 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import json
import click
from ethereumetl.file_utils import smart_open
from ethereumetl import misc_utils
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-i', '--input', default='-', type=str, help='The input file. If not specified stdin is used.')
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
@click.option('-i', '--input', default='-', show_default=True, type=str, help='The input file. If not specified stdin is used.')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-p', '--predicate', required=True, type=str,
help='Predicate in Python code e.g. "item[\'is_erc20\']".')
def filter_items(input, output, predicate):
"""Filters given JSON lines file by predicate."""
# TODO: Add support for CSV
with smart_open(input, 'r') as input_file, smart_open(output, 'w') as output_file:
for line in input_file:
item = json.loads(line)
if eval(predicate, globals(), {'item': item}):
output_file.write(json.dumps(item) + '\n')
"""Filters rows in given CSV or JSON newline-delimited file."""
def evaluated_predicate(item):
return eval(predicate, globals(), {'item': item})
misc_utils.filter_items(input, output, evaluated_predicate)

View File

@@ -21,32 +21,36 @@
# SOFTWARE.
from datetime import datetime
import click
from datetime import datetime
from web3 import Web3
from ethereumetl.logging_utils import logging_basic_config
from ethereumetl.providers.auto import get_provider_from_uri
from blockchainetl.file_utils import smart_open
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.service.eth_service import EthService
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.utils import check_classic_provider_uri
logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
help='The URI of the web3 provider e.g. '
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
@click.option('-d', '--date', required=True, type=lambda d: datetime.strptime(d, '%Y-%m-%d'),
help='The date e.g. 2018-01-01.')
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
def get_block_range_for_date(provider_uri, date, output):
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
def get_block_range_for_date(provider_uri, date, output, chain='ethereum'):
"""Outputs start and end blocks for given date."""
provider_uri = check_classic_provider_uri(chain, provider_uri)
provider = get_provider_from_uri(provider_uri)
web3 = Web3(provider)
eth_service = EthService(web3)
start_block, end_block = eth_service.get_block_range_for_date(date)
with click.open_file(output, 'w') as output_file:
with smart_open(output, 'w') as output_file:
output_file.write('{},{}\n'.format(start_block, end_block))

View File

@@ -25,23 +25,26 @@ import click
from web3 import Web3
from ethereumetl.file_utils import smart_open
from ethereumetl.logging_utils import logging_basic_config
from blockchainetl.file_utils import smart_open
from blockchainetl.logging_utils import logging_basic_config
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.service.eth_service import EthService
from ethereumetl.utils import check_classic_provider_uri
logging_basic_config()
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', type=str,
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
help='The URI of the web3 provider e.g. '
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
@click.option('-s', '--start-timestamp', required=True, type=int, help='Start unix timestamp, in seconds.')
@click.option('-e', '--end-timestamp', required=True, type=int, help='End unix timestamp, in seconds.')
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
def get_block_range_for_timestamps(provider_uri, start_timestamp, end_timestamp, output):
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
def get_block_range_for_timestamps(provider_uri, start_timestamp, end_timestamp, output, chain='ethereum'):
"""Outputs start and end blocks for given timestamps."""
provider_uri = check_classic_provider_uri(chain, provider_uri)
provider = get_provider_from_uri(provider_uri)
web3 = Web3(provider)
eth_service = EthService(web3)

View File

@@ -25,14 +25,14 @@ import click
from eth_utils import keccak
from ethereumetl.file_utils import smart_open
from ethereumetl.logging_utils import logging_basic_config
from blockchainetl.file_utils import smart_open
from blockchainetl.logging_utils import logging_basic_config
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-i', '--input-string', default='Transfer(address,address,uint256)', type=str,
@click.option('-i', '--input-string', default='Transfer(address,address,uint256)', show_default=True, type=str,
help='String to hash, e.g. Transfer(address,address,uint256)')
@click.option('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
def get_keccak_hash(input_string, output):
"""Outputs 32-byte Keccak hash of given string."""
hash = keccak(text=input_string)

111
ethereumetl/cli/stream.py Normal file
View File

@@ -0,0 +1,111 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import logging
import random
import click
from blockchainetl.streaming.streaming_utils import configure_signals, configure_logging
from ethereumetl.enumeration.entity_type import EntityType
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.streaming.item_exporter_creator import create_item_exporters
from ethereumetl.thread_local_proxy import ThreadLocalProxy
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('-l', '--last-synced-block-file', default='last_synced_block.txt', show_default=True, type=str, help='')
@click.option('--lag', default=0, show_default=True, type=int, help='The number of blocks to lag behind the network.')
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
help='The URI of the web3 provider e.g. '
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
@click.option('-o', '--output', type=str,
help='Either Google PubSub topic path e.g. projects/your-project/topics/crypto_ethereum; '
'or Postgres connection url e.g. postgresql+pg8000://postgres:admin@127.0.0.1:5432/ethereum. '
'If not specified will print to console')
@click.option('-s', '--start-block', default=None, show_default=True, type=int, help='Start block')
@click.option('-e', '--entity-types', default=','.join(EntityType.ALL_FOR_INFURA), show_default=True, type=str,
help='The list of entity types to export.')
@click.option('--period-seconds', default=10, show_default=True, type=int, help='How many seconds to sleep between syncs')
@click.option('-b', '--batch-size', default=10, show_default=True, type=int, help='How many blocks to batch in single request')
@click.option('-B', '--block-batch-size', default=1, show_default=True, type=int, help='How many blocks to batch in single sync round')
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The number of workers')
@click.option('--log-file', default=None, show_default=True, type=str, help='Log file')
@click.option('--pid-file', default=None, show_default=True, type=str, help='pid file')
def stream(last_synced_block_file, lag, provider_uri, output, start_block, entity_types,
period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None):
"""Streams all data types to console or Google Pub/Sub."""
configure_logging(log_file)
configure_signals()
entity_types = parse_entity_types(entity_types)
# validate_entity_types(entity_types, output)
from ethereumetl.streaming.item_exporter_creator import create_item_exporter
from ethereumetl.streaming.eth_streamer_adapter import EthStreamerAdapter
from blockchainetl.streaming.streamer import Streamer
# TODO: Implement fallback mechanism for provider uris instead of picking randomly
provider_uri = pick_random_provider_uri(provider_uri)
logging.info('Using ' + provider_uri)
streamer_adapter = EthStreamerAdapter(
batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
item_exporter=create_item_exporters(output),
batch_size=batch_size,
max_workers=max_workers,
entity_types=entity_types
)
streamer = Streamer(
blockchain_streamer_adapter=streamer_adapter,
last_synced_block_file=last_synced_block_file,
lag=lag,
start_block=start_block,
period_seconds=period_seconds,
block_batch_size=block_batch_size,
pid_file=pid_file
)
streamer.stream()
def parse_entity_types(entity_types):
entity_types = [c.strip() for c in entity_types.split(',')]
# validate passed types
for entity_type in entity_types:
if entity_type not in EntityType.ALL_FOR_STREAMING:
raise click.BadOptionUsage(
'--entity-type', '{} is not an available entity type. Supply a comma separated list of types from {}'
.format(entity_type, ','.join(EntityType.ALL_FOR_STREAMING)))
return entity_types
def validate_entity_types(entity_types, output):
from ethereumetl.streaming.item_exporter_creator import determine_item_exporter_type, ItemExporterType
item_exporter_type = determine_item_exporter_type(output)
if item_exporter_type == ItemExporterType.POSTGRES \
and (EntityType.CONTRACT in entity_types or EntityType.TOKEN in entity_types):
raise ValueError('contract and token are not yet supported entity types for postgres item exporter.')
def pick_random_provider_uri(provider_uri):
provider_uris = [uri.strip() for uri in provider_uri.split(',')]
return random.choice(provider_uris)

View File

@@ -28,3 +28,4 @@ class EthContract(object):
self.function_sighashes = []
self.is_erc20 = False
self.is_erc721 = False
self.block_number = None

View File

@@ -0,0 +1,32 @@
class OriginMarketplaceListing(object):
def __init__(self):
self.listing_id = None
self.ipfs_hash = None
self.listing_type = None
self.category = None
self.subcategory = None
self.language = None
self.title = None
self.description = None
self.price = None
self.currency = None
self.block_number = None
self.log_index = None
class OriginShopProduct(object):
def __init__(self):
self.listing_id = None
self.product_id = None
self.ipfs_path = None
self.external_id = None
self.parent_external_id = None
self.title = None
self.description = None
self.price = None
self.currency = None
self.image = None
self.option1 = None
self.option2 = None
self.option3 = None
self.block_number = None
self.log_index = None

View File

@@ -28,3 +28,4 @@ class EthToken(object):
self.name = None
self.decimals = None
self.total_supply = None
self.block_number = None

View File

@@ -36,6 +36,9 @@ class EthTrace(object):
self.reward_type = None
self.gas = None
self.gas_used = None
self.subtraces = None
self.subtraces = 0
self.trace_address = None
self.error = None
self.status = None
self.trace_id = None
self.trace_index = None

View File

View File

@@ -0,0 +1,12 @@
class EntityType:
BLOCK = 'block'
TRANSACTION = 'transaction'
RECEIPT = 'receipt'
LOG = 'log'
TOKEN_TRANSFER = 'token_transfer'
TRACE = 'trace'
CONTRACT = 'contract'
TOKEN = 'token'
ALL_FOR_STREAMING = [BLOCK, TRANSACTION, LOG, TOKEN_TRANSFER, TRACE, CONTRACT, TOKEN]
ALL_FOR_INFURA = [BLOCK, TRANSACTION, LOG, TOKEN_TRANSFER]

View File

@@ -20,47 +20,93 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import logging
import time
from requests.exceptions import Timeout as RequestsTimeout, HTTPError, TooManyRedirects
from web3.utils.threads import Timeout as Web3Timeout
from ethereumetl.executors.bounded_executor import BoundedExecutor
from ethereumetl.executors.fail_safe_executor import FailSafeExecutor
from ethereumetl.misc.retriable_value_error import RetriableValueError
from ethereumetl.progress_logger import ProgressLogger
from ethereumetl.utils import dynamic_batch_iterator
RETRY_EXCEPTIONS = (ConnectionError, HTTPError, RequestsTimeout, TooManyRedirects, Web3Timeout, OSError)
RETRY_EXCEPTIONS = (ConnectionError, HTTPError, RequestsTimeout, TooManyRedirects, Web3Timeout, OSError,
RetriableValueError)
BATCH_CHANGE_COOLDOWN_PERIOD_SECONDS = 2 * 60
# Executes the given work in batches, reducing the batch size exponentially in case of errors.
class BatchWorkExecutor:
def __init__(self, starting_batch_size, max_workers, retry_exceptions=RETRY_EXCEPTIONS):
def __init__(self, starting_batch_size, max_workers, retry_exceptions=RETRY_EXCEPTIONS, max_retries=5):
self.batch_size = starting_batch_size
self.max_batch_size = starting_batch_size
self.latest_batch_size_change_time = None
self.max_workers = max_workers
# Using bounded executor prevents unlimited queue growth
# and allows monitoring in-progress futures and failing fast in case of errors.
self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
self.retry_exceptions = retry_exceptions
self.max_retries = max_retries
self.progress_logger = ProgressLogger()
self.logger = logging.getLogger('BatchWorkExecutor')
def execute(self, work_iterable, work_handler, total_items=None):
self.progress_logger.start(total_items=total_items)
for batch in dynamic_batch_iterator(work_iterable, lambda: self.batch_size):
self.executor.submit(self._fail_safe_execute, work_handler, batch)
# Check race conditions
def _fail_safe_execute(self, work_handler, batch):
try:
work_handler(batch)
self._try_increase_batch_size(len(batch))
except self.retry_exceptions:
batch_size = self.batch_size
# Reduce the batch size. Subsequent batches will be 2 times smaller
if batch_size == len(batch) and batch_size > 1:
self.batch_size = int(batch_size / 2)
# For the failed batch try handling items one by one
self.logger.exception('An exception occurred while executing work_handler.')
self._try_decrease_batch_size(len(batch))
self.logger.info('The batch of size {} will be retried one item at a time.'.format(len(batch)))
for item in batch:
work_handler([item])
execute_with_retries(work_handler, [item],
max_retries=self.max_retries, retry_exceptions=self.retry_exceptions)
self.progress_logger.track(len(batch))
# Some acceptable race conditions are possible
def _try_decrease_batch_size(self, current_batch_size):
batch_size = self.batch_size
if batch_size == current_batch_size and batch_size > 1:
new_batch_size = int(current_batch_size / 2)
self.logger.info('Reducing batch size to {}.'.format(new_batch_size))
self.batch_size = new_batch_size
self.latest_batch_size_change_time = time.time()
def _try_increase_batch_size(self, current_batch_size):
if current_batch_size * 2 <= self.max_batch_size:
current_time = time.time()
latest_batch_size_change_time = self.latest_batch_size_change_time
seconds_since_last_change = current_time - latest_batch_size_change_time \
if latest_batch_size_change_time is not None else 0
if seconds_since_last_change > BATCH_CHANGE_COOLDOWN_PERIOD_SECONDS:
new_batch_size = current_batch_size * 2
self.logger.info('Increasing batch size to {}.'.format(new_batch_size))
self.batch_size = new_batch_size
self.latest_batch_size_change_time = current_time
def shutdown(self):
self.executor.shutdown()
self.progress_logger.finish()
def execute_with_retries(func, *args, max_retries=5, retry_exceptions=RETRY_EXCEPTIONS, sleep_seconds=1):
for i in range(max_retries):
try:
return func(*args)
except retry_exceptions:
logging.exception('An exception occurred while executing execute_with_retries. Retry #{}'.format(i))
if i < max_retries - 1:
logging.info('The request will be retried after {} seconds. Retry #{}'.format(sleep_seconds, i))
time.sleep(sleep_seconds)
continue
else:
raise

View File

View File

@@ -0,0 +1,31 @@
import logging
import requests
logger = logging.getLogger('ipfs')
IPFS_TIMEOUT = 5 # Timeout in second
IPFS_NUM_ATTEMPTS = 3
# A simple client to fetch content from IPFS gateways.
class IpfsClient:
def __init__(self, gatewayUrls):
self._gatewayUrls = gatewayUrls
def _get(self, path, json):
for i in range(IPFS_NUM_ATTEMPTS):
# Round-robin thru the gateways.
gatewayUrl = self._gatewayUrls[i % len(self._gatewayUrls)]
try:
url = "{}/{}".format(gatewayUrl, path)
r = requests.get(url, timeout=IPFS_TIMEOUT)
r.raise_for_status()
return r.json() if json else r.text
except Exception as e:
logger.error("Attempt #{} - Failed downloading {}: {}".format(i + 1, path, e))
raise Exception("IPFS download failure for hash {}".format(path))
def get(self, path):
return self._get(path, False)
def get_json(self, path):
return self._get(path, True)

139
ethereumetl/ipfs/origin.py Normal file
View File

@@ -0,0 +1,139 @@
import logging
import re
from ethereumetl.domain.origin import OriginMarketplaceListing, OriginShopProduct
from ethereumetl.ipfs.client import IpfsClient
logger = logging.getLogger('origin')
IPFS_PRIMARY_GATEWAY_URL = 'https://ipfs-prod.ogn.app/ipfs'
IPFS_SECONDARY_GATEWAY_URL = 'https://gateway.ipfs.io/ipfs'
# Returns an IPFS client that can be used to fetch Origin Protocol's data.
def get_origin_ipfs_client():
return IpfsClient([IPFS_PRIMARY_GATEWAY_URL, IPFS_SECONDARY_GATEWAY_URL])
# Parses the shop's HTML index page to extract the name of the IPFS directory under
# which all the shops data is located.
def _get_shop_data_dir(shop_index_page):
match = re.search('<link rel="data-dir" href="(.+?)"', shop_index_page)
return match.group(1) if match else None
# Returns the list of products from an Origin Protocol shop.
def _get_origin_shop_products(receipt_log, listing_id, ipfs_client, shop_ipfs_hash):
results = []
shop_index_page = ipfs_client.get(shop_ipfs_hash + "/index.html")
shop_data_dir = _get_shop_data_dir(shop_index_page)
path = "{}/{}".format(shop_ipfs_hash, shop_data_dir) if shop_data_dir else shop_ipfs_hash
logger.debug("Using shop path {}".format(path))
products_path = "{}/{}".format(path, 'products.json')
try:
products = ipfs_client.get_json(products_path)
except Exception as e:
logger.error("Listing {} Failed downloading product {}: {}".format(listing_id, products_path, e))
return results
logger.info("Found {} products in for listing {}".format(len(products), listing_id))
# Go through all the products from the shop.
for product in products:
product_id = product.get('id')
if not product_id:
logger.error('Product entry with missing id in products.json')
continue
logger.info("Processing product {}".format(product_id))
# Fetch the product details to get the variants.
product_base_path = "{}/{}".format(path, product_id)
product_data_path = "{}/{}".format(product_base_path, 'data.json')
try:
product = ipfs_client.get_json(product_data_path)
except Exception as e:
logger.error("Failed downloading {}: {}".format(product_data_path, e))
continue
# Extract the top product.
result = OriginShopProduct()
result.block_number = receipt_log.block_number
result.log_index = receipt_log.log_index
result.listing_id = listing_id
result.product_id = "{}-{}".format(listing_id, product_id)
result.ipfs_path = product_base_path
result.external_id = str(product.get('externalId')) if product.get('externalId') else None
result.parent_external_id = None
result.title = product.get('title')
result.description = product.get('description')
result.price = product.get('price')
result.currency = product.get('currency', 'fiat-USD')
result.option1 = None
result.option2 = None
result.option3 = None
result.image = product.get('image')
results.append(result)
# Extract the variants, if any.
variants = product.get('variants', [])
if len(variants) > 0:
logger.info("Found {} variants".format(len(variants)))
for variant in variants:
result = OriginShopProduct()
result.block_number = receipt_log.block_number
result.log_index = receipt_log.log_index
result.listing_id = listing_id
result.product_id = "{}-{}".format(listing_id, variant.get('id'))
result.ipfs_path = product_base_path
result.external_id = str(variant.get('externalId')) if variant.get('externalId') else None
result.parent_external_id = str(product.get('externalId')) if product.get('externalId') else None
result.title = variant.get('title')
result.description = product.get('description')
result.price = variant.get('price')
result.currency = product.get('currency', 'fiat-USD')
result.option1 = variant.get('option1')
result.option2 = variant.get('option2')
result.option3 = variant.get('option3')
result.image = variant.get('image')
results.append(result)
return results
# Returns a listing from the Origin Protocol marketplace.
def get_origin_marketplace_data(receipt_log, listing_id, ipfs_client, ipfs_hash):
# Load the listing's metadata from IPFS.
try:
listing_data = ipfs_client.get_json(ipfs_hash)
except Exception as e:
logger.error("Extraction failed. Listing {} Listing hash {} - {}".format(listing_id, ipfs_hash, e))
return None, []
# Fill-in an OriginMarketplaceListing object based on the IPFS data.
listing = OriginMarketplaceListing()
listing.block_number = receipt_log.block_number
listing.log_index = receipt_log.log_index
listing.listing_id = str(listing_id)
listing.ipfs_hash = ipfs_hash
listing.listing_type = listing_data.get('listingType', '')
listing.category = listing_data.get('category', '')
listing.subcategory = listing_data.get('subCategory', '')
listing.language = listing_data.get('language', '')
listing.title = listing_data.get('title', '')
listing.description = listing_data.get('description', '')
listing.price = listing_data.get('price', {}).get('amount', '')
listing.currency = listing_data.get('price', {}).get('currency', '')
# If it is a shop listing, also extract all of the shop data.
shop_listings = []
shop_ipfs_hash = listing_data.get('shopIpfsHash')
if shop_ipfs_hash:
try:
shop_listings = _get_origin_shop_products(receipt_log, listing_id, ipfs_client, shop_ipfs_hash)
except Exception as e:
logger.error("Extraction failed. Listing {} Shop hash {} - {}".format(listing_id, shop_ipfs_hash, e))
return listing, shop_listings

View File

@@ -25,13 +25,10 @@ import csv
import logging
import os
import shutil
from time import time
from web3 import Web3
from ethereumetl.csv_utils import set_max_field_size_limit
from ethereumetl.file_utils import smart_open
from blockchainetl.file_utils import smart_open
from ethereumetl.jobs.export_blocks_job import ExportBlocksJob
from ethereumetl.jobs.export_contracts_job import ExportContractsJob
from ethereumetl.jobs.export_receipts_job import ExportReceiptsJob
@@ -42,11 +39,10 @@ from ethereumetl.jobs.exporters.contracts_item_exporter import contracts_item_ex
from ethereumetl.jobs.exporters.receipts_and_logs_item_exporter import receipts_and_logs_item_exporter
from ethereumetl.jobs.exporters.token_transfers_item_exporter import token_transfers_item_exporter
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
from ethereumetl.logging_utils import logging_basic_config
from ethereumetl.providers.auto import get_provider_from_uri
from ethereumetl.thread_local_proxy import ThreadLocalProxy
from web3 import Web3
logging_basic_config()
logger = logging.getLogger('export_all')
@@ -76,21 +72,45 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
padded_batch_start_block = str(batch_start_block).zfill(8)
padded_batch_end_block = str(batch_end_block).zfill(8)
block_range = f'{padded_batch_start_block}-{padded_batch_end_block}'
file_name_suffix = f'{padded_batch_start_block}_{padded_batch_end_block}'
block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format(
padded_batch_start_block=padded_batch_start_block,
padded_batch_end_block=padded_batch_end_block,
)
file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format(
padded_batch_start_block=padded_batch_start_block,
padded_batch_end_block=padded_batch_end_block,
)
# # # blocks_and_transactions # # #
blocks_output_dir = f'{output_dir}/blocks{partition_dir}'
blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format(
output_dir=output_dir,
partition_dir=partition_dir,
)
os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)
transactions_output_dir = f'{output_dir}/transactions{partition_dir}'
transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format(
output_dir=output_dir,
partition_dir=partition_dir,
)
os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)
blocks_file = f'{blocks_output_dir}/blocks_{file_name_suffix}.csv'
transactions_file = f'{transactions_output_dir}/transactions_{file_name_suffix}.csv'
logger.info(f'Exporting blocks {block_range} to {blocks_file}')
logger.info(f'Exporting transactions from blocks {block_range} to {transactions_file}')
blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.csv'.format(
blocks_output_dir=blocks_output_dir,
file_name_suffix=file_name_suffix,
)
transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.csv'.format(
transactions_output_dir=transactions_output_dir,
file_name_suffix=file_name_suffix,
)
logger.info('Exporting blocks {block_range} to {blocks_file}'.format(
block_range=block_range,
blocks_file=blocks_file,
))
logger.info('Exporting transactions from blocks {block_range} to {transactions_file}'.format(
block_range=block_range,
transactions_file=transactions_file,
))
job = ExportBlocksJob(
start_block=batch_start_block,
@@ -107,11 +127,20 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
token_transfers_file = None
if is_log_filter_supported(provider_uri):
token_transfers_output_dir = f'{output_dir}/token_transfers{partition_dir}'
token_transfers_output_dir = '{output_dir}/token_transfers{partition_dir}'.format(
output_dir=output_dir,
partition_dir=partition_dir,
)
os.makedirs(os.path.dirname(token_transfers_output_dir), exist_ok=True)
token_transfers_file = f'{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'
logger.info(f'Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}')
token_transfers_file = '{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'.format(
token_transfers_output_dir=token_transfers_output_dir,
file_name_suffix=file_name_suffix,
)
logger.info('Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}'.format(
block_range=block_range,
token_transfers_file=token_transfers_file,
))
job = ExportTokenTransfersJob(
start_block=batch_start_block,
@@ -124,22 +153,46 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
# # # receipts_and_logs # # #
cache_output_dir = f'{output_dir}/.tmp{partition_dir}'
cache_output_dir = '{output_dir}/.tmp{partition_dir}'.format(
output_dir=output_dir,
partition_dir=partition_dir,
)
os.makedirs(os.path.dirname(cache_output_dir), exist_ok=True)
transaction_hashes_file = f'{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'
logger.info(f'Extracting hash column from transaction file {transactions_file}')
transaction_hashes_file = '{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'.format(
cache_output_dir=cache_output_dir,
file_name_suffix=file_name_suffix,
)
logger.info('Extracting hash column from transaction file {transactions_file}'.format(
transactions_file=transactions_file,
))
extract_csv_column_unique(transactions_file, transaction_hashes_file, 'hash')
receipts_output_dir = f'{output_dir}/receipts{partition_dir}'
receipts_output_dir = '{output_dir}/receipts{partition_dir}'.format(
output_dir=output_dir,
partition_dir=partition_dir,
)
os.makedirs(os.path.dirname(receipts_output_dir), exist_ok=True)
logs_output_dir = f'{output_dir}/logs{partition_dir}'
logs_output_dir = '{output_dir}/logs{partition_dir}'.format(
output_dir=output_dir,
partition_dir=partition_dir,
)
os.makedirs(os.path.dirname(logs_output_dir), exist_ok=True)
receipts_file = f'{receipts_output_dir}/receipts_{file_name_suffix}.csv'
logs_file = f'{logs_output_dir}/logs_{file_name_suffix}.csv'
logger.info(f'Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}')
receipts_file = '{receipts_output_dir}/receipts_{file_name_suffix}.csv'.format(
receipts_output_dir=receipts_output_dir,
file_name_suffix=file_name_suffix,
)
logs_file = '{logs_output_dir}/logs_{file_name_suffix}.csv'.format(
logs_output_dir=logs_output_dir,
file_name_suffix=file_name_suffix,
)
logger.info('Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}'.format(
block_range=block_range,
receipts_file=receipts_file,
logs_file=logs_file,
))
with smart_open(transaction_hashes_file, 'r') as transaction_hashes:
job = ExportReceiptsJob(
@@ -154,15 +207,29 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
# # # contracts # # #
contract_addresses_file = f'{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'
logger.info(f'Extracting contract_address from receipt file {receipts_file}')
contract_addresses_file = '{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'.format(
cache_output_dir=cache_output_dir,
file_name_suffix=file_name_suffix,
)
logger.info('Extracting contract_address from receipt file {receipts_file}'.format(
receipts_file=receipts_file
))
extract_csv_column_unique(receipts_file, contract_addresses_file, 'contract_address')
contracts_output_dir = f'{output_dir}/contracts{partition_dir}'
contracts_output_dir = '{output_dir}/contracts{partition_dir}'.format(
output_dir=output_dir,
partition_dir=partition_dir,
)
os.makedirs(os.path.dirname(contracts_output_dir), exist_ok=True)
contracts_file = f'{contracts_output_dir}/contracts_{file_name_suffix}.csv'
logger.info(f'Exporting contracts from blocks {block_range} to {contracts_file}')
contracts_file = '{contracts_output_dir}/contracts_{file_name_suffix}.csv'.format(
contracts_output_dir=contracts_output_dir,
file_name_suffix=file_name_suffix,
)
logger.info('Exporting contracts from blocks {block_range} to {contracts_file}'.format(
block_range=block_range,
contracts_file=contracts_file,
))
with smart_open(contract_addresses_file, 'r') as contract_addresses_file:
contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file
@@ -178,15 +245,29 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
# # # tokens # # #
if token_transfers_file is not None:
token_addresses_file = f'{cache_output_dir}/token_addresses_{file_name_suffix}'
logger.info(f'Extracting token_address from token_transfers file {token_transfers_file}')
token_addresses_file = '{cache_output_dir}/token_addresses_{file_name_suffix}'.format(
cache_output_dir=cache_output_dir,
file_name_suffix=file_name_suffix,
)
logger.info('Extracting token_address from token_transfers file {token_transfers_file}'.format(
token_transfers_file=token_transfers_file,
))
extract_csv_column_unique(token_transfers_file, token_addresses_file, 'token_address')
tokens_output_dir = f'{output_dir}/tokens{partition_dir}'
tokens_output_dir = '{output_dir}/tokens{partition_dir}'.format(
output_dir=output_dir,
partition_dir=partition_dir,
)
os.makedirs(os.path.dirname(tokens_output_dir), exist_ok=True)
tokens_file = f'{tokens_output_dir}/tokens_{file_name_suffix}.csv'
logger.info(f'Exporting tokens from blocks {block_range} to {tokens_file}')
tokens_file = '{tokens_output_dir}/tokens_{file_name_suffix}.csv'.format(
tokens_output_dir=tokens_output_dir,
file_name_suffix=file_name_suffix,
)
logger.info('Exporting tokens from blocks {block_range} to {tokens_file}'.format(
block_range=block_range,
tokens_file=tokens_file,
))
with smart_open(token_addresses_file, 'r') as token_addresses:
job = ExportTokensJob(
@@ -200,4 +281,7 @@ def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_s
shutil.rmtree(os.path.dirname(cache_output_dir))
end_time = time()
time_diff = round(end_time - start_time, 5)
logger.info(f'Exporting blocks {block_range} took {time_diff} seconds')
logger.info('Exporting blocks {block_range} took {time_diff} seconds'.format(
block_range=block_range,
time_diff=time_diff,
))

View File

@@ -24,7 +24,7 @@
import json
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from ethereumetl.jobs.base_job import BaseJob
from blockchainetl.jobs.base_job import BaseJob
from ethereumetl.json_rpc_requests import generate_get_block_by_number_json_rpc
from ethereumetl.mappers.block_mapper import EthBlockMapper
from ethereumetl.mappers.transaction_mapper import EthTransactionMapper
@@ -72,7 +72,7 @@ class ExportBlocksJob(BaseJob):
def _export_batch(self, block_number_batch):
blocks_rpc = list(generate_get_block_by_number_json_rpc(block_number_batch, self.export_transactions))
response = self.batch_web3_provider.make_request(json.dumps(blocks_rpc))
response = self.batch_web3_provider.make_batch_request(json.dumps(blocks_rpc))
results = rpc_response_batch_to_results(response)
blocks = [self.block_mapper.json_dict_to_block(result) for result in results]

View File

@@ -23,9 +23,10 @@
import json
from ethereumetl.domain.contract import EthContract
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from ethereumetl.jobs.base_job import BaseJob
from ethereumetl.json_rpc_requests import generate_get_code_json_rpc
from blockchainetl.jobs.base_job import BaseJob
from ethereumetl.json_rpc_requests import generate_get_code_json_rpc, generate_get_balance_json_rpc
from ethereumetl.mappers.contract_mapper import EthContractMapper
from ethereumetl.service.eth_contract_service import EthContractService
@@ -57,8 +58,8 @@ class ExportContractsJob(BaseJob):
self.batch_work_executor.execute(self.contract_addresses_iterable, self._export_contracts)
def _export_contracts(self, contract_addresses):
contracts_code_rpc = list(generate_get_code_json_rpc(contract_addresses))
response_batch = self.batch_web3_provider.make_request(json.dumps(contracts_code_rpc))
contracts_code_rpc = list(generate_get_balance_json_rpc(contract_addresses))
response_batch = self.batch_web3_provider.make_batch_request(json.dumps(contracts_code_rpc))
contracts = []
for response in response_batch:
@@ -67,7 +68,9 @@ class ExportContractsJob(BaseJob):
result = rpc_response_to_result(response)
contract_address = contract_addresses[request_id]
contract = self._get_contract(contract_address, result)
contract = EthContract()
contract.address = contract_address
contract.bytecode = result
contracts.append(contract)
for contract in contracts:

View File

@@ -24,7 +24,7 @@ import json
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from ethereumetl.json_rpc_requests import generate_trace_block_by_number_json_rpc
from ethereumetl.jobs.base_job import BaseJob
from blockchainetl.jobs.base_job import BaseJob
from ethereumetl.mappers.geth_trace_mapper import EthGethTraceMapper
from ethereumetl.utils import validate_range, rpc_response_to_result
@@ -62,7 +62,7 @@ class ExportGethTracesJob(BaseJob):
def _export_batch(self, block_number_batch):
trace_block_rpc = list(generate_trace_block_by_number_json_rpc(block_number_batch))
response = self.batch_web3_provider.make_request(json.dumps(trace_block_rpc))
response = self.batch_web3_provider.make_batch_request(json.dumps(trace_block_rpc))
for response_item in response:
block_number = response_item.get('id')

View File

@@ -0,0 +1,120 @@
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from blockchainetl.jobs.base_job import BaseJob
from ethereumetl.utils import validate_range
from ethereumetl.mappers.receipt_log_mapper import EthReceiptLogMapper
from ethereumetl.mappers.origin_mapper import OriginMarketplaceListingMapper, OriginShopProductMapper
from ethereumetl.service.origin_extractor import OriginEventExtractor
# Addresses of the marketplace contracts.
ORIGIN_MARKETPLACE_V0_CONTRACT_ADDRESS = '0x819Bb9964B6eBF52361F1ae42CF4831B921510f9'
ORIGIN_MARKETPLACE_V1_CONTRACT_ADDRESS = '0x698Ff47B84837d3971118a369c570172EE7e54c2'
# Block number at which contracts were deployed to the Mainnet.
ORIGIN_MARKETPLACE_V0_BLOCK_NUMBER_EPOCH = 6436157
ORIGIN_MARKETPLACE_V1_BLOCK_NUMBER_EPOCH = 8582597
class ExportOriginJob(BaseJob):
def __init__(
self,
start_block,
end_block,
batch_size,
web3,
ipfs_client,
marketplace_listing_exporter,
shop_product_exporter,
max_workers):
validate_range(start_block, end_block)
self.start_block = start_block
self.end_block = end_block
self.web3 = web3
self.marketplace_listing_exporter = marketplace_listing_exporter
self.shop_product_exporter = shop_product_exporter
self.batch_work_executor = BatchWorkExecutor(batch_size, max_workers)
self.event_extractor = OriginEventExtractor(ipfs_client)
self.receipt_log_mapper = EthReceiptLogMapper()
self.marketplace_listing_mapper = OriginMarketplaceListingMapper()
self.shop_listing_mapper = OriginShopProductMapper()
def _start(self):
self.marketplace_listing_exporter.open()
self.shop_product_exporter.open()
def _export(self):
self.batch_work_executor.execute(
range(self.start_block, self.end_block + 1),
self._export_batch,
total_items=self.end_block - self.start_block + 1
)
def _export_batch(self, block_number_batch):
assert len(block_number_batch) > 0
from_block = block_number_batch[0]
to_block = block_number_batch[-1]
# Nothing to process if the block range is older than the V0 marketplace contract's epoch.
if to_block < ORIGIN_MARKETPLACE_V0_BLOCK_NUMBER_EPOCH:
return
# Determine the version and address of the marketplace contract to query based on the block range.
batches = []
if to_block < ORIGIN_MARKETPLACE_V1_BLOCK_NUMBER_EPOCH or from_block >= ORIGIN_MARKETPLACE_V1_BLOCK_NUMBER_EPOCH:
# The block range falls within a single version of the marketplace contract.
version = '000' if to_block < ORIGIN_MARKETPLACE_V1_BLOCK_NUMBER_EPOCH else '001'
address = ORIGIN_MARKETPLACE_V0_CONTRACT_ADDRESS if version == '000' else ORIGIN_MARKETPLACE_V1_CONTRACT_ADDRESS
batches.append({
'contract_address': address,
'contract_version': version,
'from_block': from_block,
'to_block': to_block
})
else:
# The block range spans across 2 versions of the marketplace contract.
batches.append({
'contract_address': ORIGIN_MARKETPLACE_V0_CONTRACT_ADDRESS,
'contract_version': '000',
'from_block': from_block,
'to_block': ORIGIN_MARKETPLACE_V1_BLOCK_NUMBER_EPOCH - 1
})
batches.append({
'contract_address': ORIGIN_MARKETPLACE_V1_CONTRACT_ADDRESS,
'contract_version': '001',
'from_block': ORIGIN_MARKETPLACE_V1_BLOCK_NUMBER_EPOCH,
'to_block': to_block
})
for batch in batches:
# https://github.com/ethereum/wiki/wiki/JSON-RPC#eth_getfilterlogs
filter_params = {
'address': batch['contract_address'],
'fromBlock': batch['from_block'],
'toBlock': batch['to_block']
}
event_filter = self.web3.eth.filter(filter_params)
events = event_filter.get_all_entries()
for event in events:
log = self.receipt_log_mapper.web3_dict_to_receipt_log(event)
listing, shop_products = self.event_extractor.extract_event_from_log(log, batch['contract_version'])
if listing:
item = self.marketplace_listing_mapper.listing_to_dict(listing)
self.marketplace_listing_exporter.export_item(item)
for product in shop_products:
item = self.shop_listing_mapper.product_to_dict(product)
self.shop_product_exporter.export_item(item)
self.web3.eth.uninstallFilter(event_filter.filter_id)
def _end(self):
self.batch_work_executor.shutdown()
self.marketplace_listing_exporter.close()
self.shop_product_exporter.close()

View File

@@ -23,7 +23,7 @@
import json
from ethereumetl.jobs.base_job import BaseJob
from blockchainetl.jobs.base_job import BaseJob
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from ethereumetl.json_rpc_requests import generate_get_receipt_json_rpc
from ethereumetl.mappers.receipt_log_mapper import EthReceiptLogMapper
@@ -64,7 +64,7 @@ class ExportReceiptsJob(BaseJob):
def _export_receipts(self, transaction_hashes):
receipts_rpc = list(generate_get_receipt_json_rpc(transaction_hashes))
response = self.batch_web3_provider.make_request(json.dumps(receipts_rpc))
response = self.batch_web3_provider.make_batch_request(json.dumps(receipts_rpc))
results = rpc_response_batch_to_results(response)
receipts = [self.receipt_mapper.json_dict_to_receipt(result) for result in results]
for receipt in receipts:

View File

@@ -21,7 +21,7 @@
# SOFTWARE.
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from ethereumetl.jobs.base_job import BaseJob
from blockchainetl.jobs.base_job import BaseJob
from ethereumetl.mappers.token_transfer_mapper import EthTokenTransferMapper
from ethereumetl.mappers.receipt_log_mapper import EthReceiptLogMapper
from ethereumetl.service.token_transfer_extractor import EthTokenTransferExtractor, TRANSFER_EVENT_TOPIC

View File

@@ -22,7 +22,7 @@
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from ethereumetl.jobs.base_job import BaseJob
from blockchainetl.jobs.base_job import BaseJob
from ethereumetl.mappers.token_mapper import EthTokenMapper
from ethereumetl.service.eth_token_service import EthTokenService
@@ -46,8 +46,9 @@ class ExportTokensJob(BaseJob):
for token_address in token_addresses:
self._export_token(token_address)
def _export_token(self, token_address):
def _export_token(self, token_address, block_number=None):
token = self.token_service.get_token(token_address)
token.block_number = block_number
token_dict = self.token_mapper.token_to_dict(token)
self.item_exporter.export_item(token_dict)

View File

@@ -21,9 +21,14 @@
# SOFTWARE.
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from ethereumetl.jobs.base_job import BaseJob
from ethereumetl.utils import validate_range
from blockchainetl.jobs.base_job import BaseJob
from ethereumetl.mainnet_daofork_state_changes import DAOFORK_BLOCK_NUMBER
from ethereumetl.mappers.trace_mapper import EthTraceMapper
from ethereumetl.service.eth_special_trace_service import EthSpecialTraceService
from ethereumetl.service.trace_id_calculator import calculate_trace_ids
from ethereumetl.service.trace_status_calculator import calculate_trace_statuses
from ethereumetl.utils import validate_range
class ExportTracesJob(BaseJob):
@@ -34,7 +39,9 @@ class ExportTracesJob(BaseJob):
batch_size,
web3,
item_exporter,
max_workers):
max_workers,
include_genesis_traces=False,
include_daofork_traces=False):
validate_range(start_block, end_block)
self.start_block = start_block
self.end_block = end_block
@@ -47,6 +54,10 @@ class ExportTracesJob(BaseJob):
self.trace_mapper = EthTraceMapper()
self.special_trace_service = EthSpecialTraceService()
self.include_genesis_traces = include_genesis_traces
self.include_daofork_traces = include_daofork_traces
def _start(self):
self.item_exporter.open()
@@ -63,14 +74,39 @@ class ExportTracesJob(BaseJob):
assert len(block_number_batch) == 1
block_number = block_number_batch[0]
all_traces = []
if self.include_genesis_traces and 0 in block_number_batch:
genesis_traces = self.special_trace_service.get_genesis_traces()
all_traces.extend(genesis_traces)
if self.include_daofork_traces and DAOFORK_BLOCK_NUMBER in block_number_batch:
daofork_traces = self.special_trace_service.get_daofork_traces()
all_traces.extend(daofork_traces)
# TODO: Change to traceFilter when this issue is fixed
# https://github.com/paritytech/parity-ethereum/issues/9822
json_traces = self.web3.parity.traceBlock(block_number)
for json_trace in json_traces:
trace = self.trace_mapper.json_dict_to_trace(json_trace)
if json_traces is None:
raise ValueError('Response from the node is None. Is the node fully synced?')
traces = [self.trace_mapper.json_dict_to_trace(json_trace) for json_trace in json_traces]
all_traces.extend(traces)
calculate_trace_statuses(all_traces)
calculate_trace_ids(all_traces)
calculate_trace_indexes(all_traces)
for trace in all_traces:
self.item_exporter.export_item(self.trace_mapper.trace_to_dict(trace))
def _end(self):
self.batch_work_executor.shutdown()
self.item_exporter.close()
def calculate_trace_indexes(traces):
# Only works if traces were originally ordered correctly which is the case for Parity traces
for ind, trace in enumerate(traces):
trace.trace_index = ind

View File

@@ -21,7 +21,7 @@
# SOFTWARE.
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
BLOCK_FIELDS_TO_EXPORT = [
'number',
@@ -55,7 +55,8 @@ TRANSACTION_FIELDS_TO_EXPORT = [
'value',
'gas',
'gas_price',
'input'
'input',
'block_timestamp'
]

View File

@@ -21,14 +21,15 @@
# SOFTWARE.
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
FIELDS_TO_EXPORT = [
'address',
'bytecode',
'function_sighashes',
'is_erc20',
'is_erc721'
'is_erc721',
'block_number',
]

View File

@@ -21,7 +21,7 @@
# SOFTWARE.
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
FIELDS_TO_EXPORT = [
'block_number',

View File

@@ -0,0 +1,58 @@
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
MARKETPLACE_FIELDS_TO_EXPORT = [
'block_number',
'log_index',
'listing_id',
'ipfs_hash',
'listing_type',
'ipfs_hash',
'category',
'subcategory',
'language',
'title',
'description',
'price',
'currency'
]
SHOP_FIELDS_TO_EXPORT = [
'block_number',
'log_index',
'listing_id',
'product_id',
'ipfs_path',
'ipfs_hash',
'external_id',
'parent_external_id',
'title',
'description',
'price',
'currency',
'option1',
'option2',
'option3',
'image'
]
def origin_marketplace_listing_item_exporter(output):
return CompositeItemExporter(
filename_mapping={
'origin_marketplace_listing': output
},
field_mapping={
'origin_marketplace_listing': MARKETPLACE_FIELDS_TO_EXPORT
}
)
def origin_shop_product_item_exporter(output):
return CompositeItemExporter(
filename_mapping={
'origin_shop_product': output
},
field_mapping={
'origin_shop_product': SHOP_FIELDS_TO_EXPORT
}
)

View File

@@ -21,7 +21,7 @@
# SOFTWARE.
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
RECEIPT_FIELDS_TO_EXPORT = [
'transaction_hash',

View File

@@ -21,7 +21,7 @@
# SOFTWARE.
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
FIELDS_TO_EXPORT = [
'token_address',

View File

@@ -21,14 +21,15 @@
# SOFTWARE.
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
FIELDS_TO_EXPORT = [
'address',
'symbol',
'name',
'decimals',
'total_supply'
'total_supply',
'block_number'
]

View File

@@ -21,7 +21,7 @@
# SOFTWARE.
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
FIELDS_TO_EXPORT = [
'block_number',
@@ -40,6 +40,8 @@ FIELDS_TO_EXPORT = [
'subtraces',
'trace_address',
'error',
'status',
'trace_id',
]

View File

@@ -0,0 +1,85 @@
# MIT License
#
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from ethereumetl.domain.contract import EthContract
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from blockchainetl.jobs.base_job import BaseJob
from ethereumetl.mappers.contract_mapper import EthContractMapper
from ethereumetl.service.eth_contract_service import EthContractService
from ethereumetl.utils import to_int_or_none
# Extract contracts
class ExtractContractsJob(BaseJob):
def __init__(
self,
traces_iterable,
batch_size,
max_workers,
item_exporter):
self.traces_iterable = traces_iterable
self.batch_work_executor = BatchWorkExecutor(batch_size, max_workers)
self.item_exporter = item_exporter
self.contract_service = EthContractService()
self.contract_mapper = EthContractMapper()
def _start(self):
self.item_exporter.open()
def _export(self):
self.batch_work_executor.execute(self.traces_iterable, self._extract_contracts)
def _extract_contracts(self, traces):
for trace in traces:
trace['status'] = to_int_or_none(trace.get('status'))
trace['block_number'] = to_int_or_none(trace.get('block_number'))
contract_creation_traces = [trace for trace in traces
if trace.get('trace_type') == 'create' and trace.get('to_address') is not None
and len(trace.get('to_address')) > 0 and trace.get('status') == 1]
contracts = []
for trace in contract_creation_traces:
contract = EthContract()
contract.address = trace.get('to_address')
bytecode = trace.get('output')
contract.bytecode = bytecode
contract.block_number = trace.get('block_number')
function_sighashes = self.contract_service.get_function_sighashes(bytecode)
contract.function_sighashes = function_sighashes
contract.is_erc20 = self.contract_service.is_erc20_contract(function_sighashes)
contract.is_erc721 = self.contract_service.is_erc721_contract(function_sighashes)
contracts.append(contract)
for contract in contracts:
self.item_exporter.export_item(self.contract_mapper.contract_to_dict(contract))
def _end(self):
self.batch_work_executor.shutdown()
self.item_exporter.close()

View File

@@ -21,7 +21,7 @@
# SOFTWARE.
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from ethereumetl.jobs.base_job import BaseJob
from blockchainetl.jobs.base_job import BaseJob
from ethereumetl.mappers.trace_mapper import EthTraceMapper
from ethereumetl.mappers.geth_trace_mapper import EthGethTraceMapper

View File

@@ -21,7 +21,7 @@
# SOFTWARE.
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
from ethereumetl.jobs.base_job import BaseJob
from blockchainetl.jobs.base_job import BaseJob
from ethereumetl.mappers.token_transfer_mapper import EthTokenTransferMapper
from ethereumetl.mappers.receipt_log_mapper import EthReceiptLogMapper
from ethereumetl.service.token_transfer_extractor import EthTokenTransferExtractor

Some files were not shown because too many files have changed in this diff Show More