mirror of
https://github.com/blockchain-etl/ethereum-etl.git
synced 2026-01-11 06:38:17 -05:00
Compare commits
596 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fbd57fc079 | ||
|
|
8204c0827d | ||
|
|
46b91a9ff2 | ||
|
|
b5fd64bdca | ||
|
|
d8547e9c7c | ||
|
|
7ef53859c1 | ||
|
|
e38d1c1f2f | ||
|
|
43fe6b49b3 | ||
|
|
db274c8a85 | ||
|
|
69247042a4 | ||
|
|
218e1e4356 | ||
|
|
5e0fc8cc75 | ||
|
|
77efda5106 | ||
|
|
ece0b7f422 | ||
|
|
b31b76a73a | ||
|
|
0cb7eb60b5 | ||
|
|
02943f7caf | ||
|
|
b844b95868 | ||
|
|
4d305a284f | ||
|
|
e161e6ef13 | ||
|
|
9b917b8ddd | ||
|
|
383caf8331 | ||
|
|
c61e91235f | ||
|
|
0e4b4a894b | ||
|
|
d58c1ebda7 | ||
|
|
f0bf07e60c | ||
|
|
efe7acdc13 | ||
|
|
20404eca9e | ||
|
|
435cbe0a74 | ||
|
|
b606e22cd5 | ||
|
|
4943b0b795 | ||
|
|
eed2068def | ||
|
|
313b4b1237 | ||
|
|
ad6149155e | ||
|
|
c55c0f68dc | ||
|
|
b031b04bc7 | ||
|
|
b314f1ed0c | ||
|
|
61eb2e6e21 | ||
|
|
9f62e7ecea | ||
|
|
4da7e7b23f | ||
|
|
de72ba3511 | ||
|
|
3aabf9aa54 | ||
|
|
284755bafc | ||
|
|
23133594e8 | ||
|
|
ca54ef6c4b | ||
|
|
836f30e198 | ||
|
|
1c6508f15d | ||
|
|
a4d6f8fcb1 | ||
|
|
bc79d7d9bf | ||
|
|
7fdcf0f7b7 | ||
|
|
d3330f7ddc | ||
|
|
1066ec9025 | ||
|
|
2a92ecbf31 | ||
|
|
c238e8b57b | ||
|
|
a27d2427e1 | ||
|
|
c18f78506c | ||
|
|
23bad940db | ||
|
|
0a52db4b8a | ||
|
|
9fd1f906f2 | ||
|
|
f08f93ddfe | ||
|
|
9e51c3b8d4 | ||
|
|
79d341ea45 | ||
|
|
9db1ff104a | ||
|
|
952a49ba4b | ||
|
|
aab122ebf3 | ||
|
|
438c9af751 | ||
|
|
3ec2af25e1 | ||
|
|
84101407c1 | ||
|
|
97a0275ced | ||
|
|
7cbfd0e533 | ||
|
|
94ebd3f3e9 | ||
|
|
c0fd158211 | ||
|
|
7529c43f4e | ||
|
|
ce906f0af1 | ||
|
|
eaf4bf0bf2 | ||
|
|
1a0a8cf0f8 | ||
|
|
f0e4302423 | ||
|
|
fb35431aa7 | ||
|
|
87b1669434 | ||
|
|
9678bb91c7 | ||
|
|
f4e2b57463 | ||
|
|
6599a438a0 | ||
|
|
f8a5f25376 | ||
|
|
de96e394ee | ||
|
|
a58fe4585d | ||
|
|
f8878ff320 | ||
|
|
993ebe67c8 | ||
|
|
f967d73a95 | ||
|
|
e8b0447a63 | ||
|
|
af2ef17832 | ||
|
|
161aa6e472 | ||
|
|
7c80c09500 | ||
|
|
3affbadac3 | ||
|
|
84518f70ae | ||
|
|
aae968cd4b | ||
|
|
6f44daf023 | ||
|
|
2da9d050f4 | ||
|
|
2939c0afbf | ||
|
|
2678a2a2e3 | ||
|
|
d801da96dd | ||
|
|
b876f2059e | ||
|
|
204bcb65f6 | ||
|
|
92c07982c4 | ||
|
|
b6dbf07dbf | ||
|
|
f0732961f5 | ||
|
|
8498a775da | ||
|
|
f0e98871a2 | ||
|
|
f7f192510b | ||
|
|
b1acfa3be7 | ||
|
|
372bf2cb16 | ||
|
|
45a089fe0c | ||
|
|
688ecdfa3f | ||
|
|
0f6234ade3 | ||
|
|
47308f4891 | ||
|
|
2c91a31061 | ||
|
|
956695b77b | ||
|
|
533f516296 | ||
|
|
d34b28e4bf | ||
|
|
3ed8b8bc3e | ||
|
|
e1f658bc36 | ||
|
|
aae2edb20b | ||
|
|
12851c17a5 | ||
|
|
f5115547a3 | ||
|
|
58f5d9020c | ||
|
|
f5fa89a916 | ||
|
|
262e5f65f1 | ||
|
|
6b64c2338b | ||
|
|
be64a901ab | ||
|
|
97e2749f2a | ||
|
|
ca9eb6696b | ||
|
|
6c3a0694a3 | ||
|
|
837c324448 | ||
|
|
7ef53acee0 | ||
|
|
119a54fca1 | ||
|
|
cb0f955c27 | ||
|
|
9725ff9122 | ||
|
|
a142542ef9 | ||
|
|
342c5df3bb | ||
|
|
d189e7a344 | ||
|
|
f8f22f93a1 | ||
|
|
f4403a7e3f | ||
|
|
4ee070627c | ||
|
|
7a337e724a | ||
|
|
ac812a0f36 | ||
|
|
1711d2e809 | ||
|
|
d251f21b04 | ||
|
|
dcdc776c1b | ||
|
|
59ddb23f45 | ||
|
|
64adeb77a8 | ||
|
|
caff3065f7 | ||
|
|
d5567bf343 | ||
|
|
26e940224b | ||
|
|
5efa6e0eb9 | ||
|
|
53c1b59c84 | ||
|
|
8c9d6a62cc | ||
|
|
d085d5a5a4 | ||
|
|
43227e54b2 | ||
|
|
00e63d2b83 | ||
|
|
d58e72974a | ||
|
|
817660199c | ||
|
|
50925fc94d | ||
|
|
e63e703390 | ||
|
|
8a87ba85e3 | ||
|
|
15ff2a2ecb | ||
|
|
e511dac818 | ||
|
|
64d16f581b | ||
|
|
898ce3f3bf | ||
|
|
da6cc6f653 | ||
|
|
53c74e9996 | ||
|
|
67e27a6536 | ||
|
|
3a28eb116d | ||
|
|
b80eac42a6 | ||
|
|
72dcfd4979 | ||
|
|
4bfa3e6ba4 | ||
|
|
1883a01e3f | ||
|
|
1883e5cdac | ||
|
|
8a49edcae3 | ||
|
|
ce2ce23ccd | ||
|
|
d1189ad721 | ||
|
|
c135afc4bc | ||
|
|
65feed595a | ||
|
|
e82a86ca7f | ||
|
|
ed31940391 | ||
|
|
a0689730e4 | ||
|
|
0beebb139d | ||
|
|
5dea830c16 | ||
|
|
37d89e9c9d | ||
|
|
baa79e74c9 | ||
|
|
db590188d1 | ||
|
|
87f5e45d17 | ||
|
|
b772ec7fd7 | ||
|
|
69bb6f9bb3 | ||
|
|
2a9e468c1e | ||
|
|
be1892dffa | ||
|
|
31fb4efc48 | ||
|
|
167b38b6bc | ||
|
|
7d47dd34d6 | ||
|
|
c6fbd10ef3 | ||
|
|
114cd60b5a | ||
|
|
1a0bac2e2c | ||
|
|
2a17fb67ad | ||
|
|
dba7adf8f1 | ||
|
|
75847dd6ba | ||
|
|
e3b83639c2 | ||
|
|
6bb0fffd38 | ||
|
|
b62a2f1b30 | ||
|
|
9d9c383ab8 | ||
|
|
79ad41aad9 | ||
|
|
38c2c1beec | ||
|
|
a582f73cd2 | ||
|
|
257da16c48 | ||
|
|
1b9c07862c | ||
|
|
0667b68cb6 | ||
|
|
28acabe45e | ||
|
|
f593053af3 | ||
|
|
8df7d901ee | ||
|
|
a2b678167b | ||
|
|
c4c9207474 | ||
|
|
289b9005a0 | ||
|
|
eefffb0aa6 | ||
|
|
967c1ad37a | ||
|
|
b0408582db | ||
|
|
8f93376232 | ||
|
|
de4380fb89 | ||
|
|
e0ca8f9a8c | ||
|
|
589cb06ef0 | ||
|
|
54d9220130 | ||
|
|
c2f24c6d18 | ||
|
|
fedf6e60a4 | ||
|
|
629aed5bc8 | ||
|
|
25fc768f39 | ||
|
|
42b96bcf7b | ||
|
|
cf80415fcf | ||
|
|
104576d5eb | ||
|
|
135a475d46 | ||
|
|
90afaabce6 | ||
|
|
55a9371b2b | ||
|
|
1a8ac0630f | ||
|
|
3d79a22370 | ||
|
|
d2b84bd643 | ||
|
|
1a212405ed | ||
|
|
a808330950 | ||
|
|
9ff51f993c | ||
|
|
f2f88e64c5 | ||
|
|
7ee3497431 | ||
|
|
170e7979fe | ||
|
|
5dd95554ef | ||
|
|
45c3baffe6 | ||
|
|
86bb20e9d1 | ||
|
|
8aa076bfb7 | ||
|
|
d9378e7d17 | ||
|
|
55332cde00 | ||
|
|
eaf6a8f9b6 | ||
|
|
040849c66b | ||
|
|
c2a878e175 | ||
|
|
083cbd6891 | ||
|
|
c7ffffa5a8 | ||
|
|
240982bac1 | ||
|
|
53fa461001 | ||
|
|
efeeb297df | ||
|
|
1e00335b71 | ||
|
|
e70698e8b5 | ||
|
|
5f41b1ef15 | ||
|
|
926c0afad1 | ||
|
|
47049e0697 | ||
|
|
1bacd89423 | ||
|
|
686107b313 | ||
|
|
4dba6a1e8c | ||
|
|
ecc4484034 | ||
|
|
b568101c9c | ||
|
|
d25bd078f3 | ||
|
|
cb5dcac8c0 | ||
|
|
e79c32e422 | ||
|
|
479d8ece72 | ||
|
|
b4a385e915 | ||
|
|
0e11db80f0 | ||
|
|
dbb7248206 | ||
|
|
de2a9ed5aa | ||
|
|
b3fab3c089 | ||
|
|
895bf818a2 | ||
|
|
d83fcd4307 | ||
|
|
d7283ba301 | ||
|
|
111633874a | ||
|
|
3c6291a873 | ||
|
|
48f11fc9e1 | ||
|
|
511b60ecfa | ||
|
|
fcf576f6bc | ||
|
|
15b0f683b9 | ||
|
|
742e78b7f7 | ||
|
|
68f6bec10b | ||
|
|
04b179aadf | ||
|
|
8d159a58c0 | ||
|
|
10087aecbb | ||
|
|
e340074ce6 | ||
|
|
a74f53f351 | ||
|
|
e61248e798 | ||
|
|
e78a856438 | ||
|
|
40b98215b6 | ||
|
|
c19bdf053f | ||
|
|
8ccb6dfe77 | ||
|
|
4ce02de2e0 | ||
|
|
56d232781a | ||
|
|
c5a67b0fd4 | ||
|
|
2498bf5560 | ||
|
|
4c0a06fc36 | ||
|
|
101f0dbd67 | ||
|
|
bc40a13ec6 | ||
|
|
1bca49b31f | ||
|
|
8df8407137 | ||
|
|
4958c1e264 | ||
|
|
60f5340754 | ||
|
|
c84a6d1195 | ||
|
|
04bc4a888b | ||
|
|
84886c7f48 | ||
|
|
c1e5691d1d | ||
|
|
16dfcb24ed | ||
|
|
8164ee105d | ||
|
|
ac866f6459 | ||
|
|
90c4982a6b | ||
|
|
ae131baa0e | ||
|
|
cb3ee69123 | ||
|
|
81374dea00 | ||
|
|
71364a4fea | ||
|
|
d612ba40b8 | ||
|
|
e853d4fd19 | ||
|
|
82045cc21c | ||
|
|
eeabd57b98 | ||
|
|
dec070e812 | ||
|
|
156b603cb0 | ||
|
|
141c82005a | ||
|
|
e0636bbb31 | ||
|
|
b65f37af7b | ||
|
|
1a6c417ab0 | ||
|
|
5a09102eb2 | ||
|
|
f05ce47b95 | ||
|
|
51927defc7 | ||
|
|
4b92d7b670 | ||
|
|
0c4342fe11 | ||
|
|
41f20435a2 | ||
|
|
676dfb22c5 | ||
|
|
7cf0f34785 | ||
|
|
d46528ba24 | ||
|
|
87e6b57024 | ||
|
|
70db781856 | ||
|
|
f5836345cd | ||
|
|
d7ac8fb758 | ||
|
|
ded7a6a007 | ||
|
|
dd8d2bdc38 | ||
|
|
093fe56dde | ||
|
|
68fce399a8 | ||
|
|
438b911b0f | ||
|
|
dae8deff36 | ||
|
|
cb84071680 | ||
|
|
d882c64671 | ||
|
|
477eb35a39 | ||
|
|
ab7fd89774 | ||
|
|
064353a993 | ||
|
|
94b7ce8a4c | ||
|
|
c8e4c840d5 | ||
|
|
136ed3232a | ||
|
|
cf8c6edfb7 | ||
|
|
e90e70e94f | ||
|
|
64614c2670 | ||
|
|
1ffb592771 | ||
|
|
030c460f36 | ||
|
|
92db79b8a7 | ||
|
|
2d37486970 | ||
|
|
499596ad3e | ||
|
|
106de42844 | ||
|
|
aa106467b8 | ||
|
|
e53dbe13f9 | ||
|
|
38752a557a | ||
|
|
e8b6fe742e | ||
|
|
d1e2f83071 | ||
|
|
69c64e048e | ||
|
|
1d4aa94d81 | ||
|
|
2b23e08a64 | ||
|
|
7434d149bb | ||
|
|
eab288d507 | ||
|
|
091c7edd60 | ||
|
|
0373f48956 | ||
|
|
32eae84170 | ||
|
|
359fe17ac3 | ||
|
|
19daa86e52 | ||
|
|
e428bead6d | ||
|
|
ee5de4b465 | ||
|
|
ee8c68d215 | ||
|
|
76cdec4a5c | ||
|
|
7d9892de85 | ||
|
|
faffca21ef | ||
|
|
a74ab02563 | ||
|
|
8daa06d007 | ||
|
|
2ab3b7e9bf | ||
|
|
3234f64c45 | ||
|
|
437718083e | ||
|
|
0f28aee915 | ||
|
|
5e311b87da | ||
|
|
fdea8ca36e | ||
|
|
ca8cd55223 | ||
|
|
f4586b1501 | ||
|
|
f49b46363e | ||
|
|
40d4cf374c | ||
|
|
031c5acedf | ||
|
|
f4718a6cb9 | ||
|
|
f35b4ecde4 | ||
|
|
8257c4bde5 | ||
|
|
8b21e34250 | ||
|
|
e8ea43067a | ||
|
|
e695c55704 | ||
|
|
5c941a403e | ||
|
|
67b9ef1728 | ||
|
|
3d5c5a3c73 | ||
|
|
fa81a41ae5 | ||
|
|
fcd963ced6 | ||
|
|
e69148ca9e | ||
|
|
143f59018f | ||
|
|
b46717bf2b | ||
|
|
66971c82e8 | ||
|
|
040a42dba5 | ||
|
|
2e0b59553c | ||
|
|
26bcb6c9d8 | ||
|
|
e82618d1c2 | ||
|
|
e6c055c3fa | ||
|
|
925471b064 | ||
|
|
af72640c37 | ||
|
|
a44637f430 | ||
|
|
a446b55453 | ||
|
|
9072abf55d | ||
|
|
c6118be5a5 | ||
|
|
4ed17d4980 | ||
|
|
1bf2553aed | ||
|
|
04b34c5dd5 | ||
|
|
9614aeba7f | ||
|
|
eba4e4e58e | ||
|
|
c5d155b617 | ||
|
|
418b7a83d3 | ||
|
|
4fccd2c181 | ||
|
|
f07752907a | ||
|
|
140af3e649 | ||
|
|
c9fa2a1873 | ||
|
|
7214d771b9 | ||
|
|
a2a48f9642 | ||
|
|
ad8fda002e | ||
|
|
99803a772e | ||
|
|
1defa289e5 | ||
|
|
7f725182aa | ||
|
|
7afe6093b0 | ||
|
|
4465222622 | ||
|
|
2f8d901829 | ||
|
|
e27b5c28fd | ||
|
|
47bd5957d4 | ||
|
|
edc3211544 | ||
|
|
a9ee19f871 | ||
|
|
c5ea25a200 | ||
|
|
81033022b9 | ||
|
|
ac60502f72 | ||
|
|
9dfff1261d | ||
|
|
69cc8a70c0 | ||
|
|
ba60c906f5 | ||
|
|
751f9b57ac | ||
|
|
a9672ac9c1 | ||
|
|
ea6d0e87da | ||
|
|
22e6795789 | ||
|
|
302fbc9947 | ||
|
|
3483d77aa4 | ||
|
|
871af57840 | ||
|
|
c76d25bf3f | ||
|
|
2c3ece7010 | ||
|
|
930efe5a0e | ||
|
|
aac00bf7d0 | ||
|
|
6f19ff0756 | ||
|
|
f18f303fa9 | ||
|
|
b5e290e2c1 | ||
|
|
a10fb2fac9 | ||
|
|
83a7b5383f | ||
|
|
978513efc0 | ||
|
|
65f5de1df1 | ||
|
|
df10702486 | ||
|
|
a288b51b73 | ||
|
|
a6337d0817 | ||
|
|
d63713ece1 | ||
|
|
ed2466d16d | ||
|
|
aab657da9b | ||
|
|
79b9a46bae | ||
|
|
cac7305f53 | ||
|
|
80cd37bdde | ||
|
|
ff4218c0b8 | ||
|
|
f50cc7253b | ||
|
|
4fc495342b | ||
|
|
b0a5e02dd5 | ||
|
|
f7af95d6c7 | ||
|
|
706eb8a9c9 | ||
|
|
e30e58f032 | ||
|
|
3b866f4f32 | ||
|
|
d437f58eb9 | ||
|
|
ecea237187 | ||
|
|
aa1a0ee32a | ||
|
|
4c3d67d442 | ||
|
|
061f131919 | ||
|
|
1e793f3d48 | ||
|
|
3876957917 | ||
|
|
76879e593d | ||
|
|
f9b353d803 | ||
|
|
fb2c7fb149 | ||
|
|
21808fb1c8 | ||
|
|
a4a15cb534 | ||
|
|
04aa34dca4 | ||
|
|
5c98d95a5a | ||
|
|
49faafa3e0 | ||
|
|
eb69307ddb | ||
|
|
c8202d9533 | ||
|
|
01c1792ca5 | ||
|
|
32e7f593be | ||
|
|
538d841906 | ||
|
|
3050f50893 | ||
|
|
49c6f042d7 | ||
|
|
320f592e51 | ||
|
|
c0c8fd5845 | ||
|
|
7b9276c5a2 | ||
|
|
e5e15b262d | ||
|
|
4092ce92b9 | ||
|
|
819f26e09e | ||
|
|
b500542437 | ||
|
|
652193a2f2 | ||
|
|
01d7ece2f0 | ||
|
|
d93fcbcbf7 | ||
|
|
b94c5ff9e2 | ||
|
|
379dfce791 | ||
|
|
459a9f1950 | ||
|
|
8fb5e3f15f | ||
|
|
51fc2cf86e | ||
|
|
436b64cee3 | ||
|
|
d7f9056e3c | ||
|
|
f516cbed57 | ||
|
|
5f0f111b36 | ||
|
|
2453db08f0 | ||
|
|
336161b484 | ||
|
|
59754c3598 | ||
|
|
c36e2acbbe | ||
|
|
69511bc21c | ||
|
|
6ab4ffd0c7 | ||
|
|
ba9cac62b5 | ||
|
|
d920e2ea68 | ||
|
|
3da713742f | ||
|
|
59e33dec92 | ||
|
|
acd50af6fa | ||
|
|
6c67a49c92 | ||
|
|
98f349e236 | ||
|
|
7439eb611d | ||
|
|
b90c8688d8 | ||
|
|
79394d72bc | ||
|
|
9b472d6f64 | ||
|
|
eb38bde281 | ||
|
|
b027892ed6 | ||
|
|
616c905fd1 | ||
|
|
f4666947f2 | ||
|
|
ae246fec43 | ||
|
|
b602be97fd | ||
|
|
a04187df93 | ||
|
|
481e764107 | ||
|
|
8cd6c89aa9 | ||
|
|
0b3f4d6be1 | ||
|
|
cdbc554e77 | ||
|
|
6b9af1e8df | ||
|
|
8fe091d8f3 | ||
|
|
9cc0743a25 | ||
|
|
7a79f42a9a | ||
|
|
3f803cf88e | ||
|
|
978ccb219d | ||
|
|
d442e462e1 | ||
|
|
7ecdfa4fb7 | ||
|
|
ae7337cd6d | ||
|
|
2f0d3bff35 | ||
|
|
572a42ba12 | ||
|
|
138c7e3ce6 | ||
|
|
10e95f19d0 | ||
|
|
6ddb96dd36 | ||
|
|
c6ad3c355e | ||
|
|
b26c6a31dc | ||
|
|
1db56b7a69 | ||
|
|
9b315926d4 | ||
|
|
dc5488803c | ||
|
|
48a1056238 | ||
|
|
1fdde0c7c0 | ||
|
|
da68fe948b | ||
|
|
cc3ed86f3b | ||
|
|
60017a5abe | ||
|
|
8cc869694d | ||
|
|
3fbf70fb4f | ||
|
|
f7e7e55441 | ||
|
|
d677d442bd | ||
|
|
7a47d93d9e | ||
|
|
e102f76631 | ||
|
|
9bd9d4347b | ||
|
|
54494aef6c | ||
|
|
c4c3ccc79a |
4
.dockerignore
Normal file
4
.dockerignore
Normal file
@@ -0,0 +1,4 @@
|
||||
.*
|
||||
last_synced_block.txt
|
||||
pid.txt
|
||||
output
|
||||
20
.github/workflows/publish-to-dockerhub.yml
vendored
Normal file
20
.github/workflows/publish-to-dockerhub.yml
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
name: Publish DockerHub
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: Publish to DockerHub
|
||||
if: startsWith(github.event.ref, 'refs/tags/v')
|
||||
uses: elgohr/Publish-Docker-Github-Action@master
|
||||
with:
|
||||
name: blockchainetl/ethereum-etl
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
tag_semver: true
|
||||
30
.github/workflows/publish-to-pypi.yml
vendored
Normal file
30
.github/workflows/publish-to-pypi.yml
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
name: Publish to PyPI and TestPyPI
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
jobs:
|
||||
build-n-publish:
|
||||
name: Build and publish to PyPI and TestPyPI
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: Set up Python 3.7
|
||||
uses: actions/setup-python@v1
|
||||
with:
|
||||
python-version: 3.7
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python setup.py sdist
|
||||
- name: Publish distribution to Test PyPI
|
||||
if: startsWith(github.event.ref, 'refs/tags/v')
|
||||
uses: pypa/gh-action-pypi-publish@master
|
||||
with:
|
||||
password: ${{ secrets.test_pypi_password }}
|
||||
repository_url: https://test.pypi.org/legacy/
|
||||
- name: Publish distribution to PyPI
|
||||
if: startsWith(github.event.ref, 'refs/tags/v')
|
||||
uses: pypa/gh-action-pypi-publish@master
|
||||
with:
|
||||
password: ${{ secrets.pypi_password }}
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -47,3 +47,6 @@ coverage.xml
|
||||
.venv
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# etl
|
||||
/last_synced_block.txt
|
||||
14
.readthedocs.yaml
Normal file
14
.readthedocs.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
# Read the Docs configuration file for MkDocs projects
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Set the version of Python and other tools you might need
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.12"
|
||||
|
||||
mkdocs:
|
||||
configuration: mkdocs.yml
|
||||
15
.travis.yml
15
.travis.yml
@@ -1,7 +1,14 @@
|
||||
language: python
|
||||
python:
|
||||
- "3.6"
|
||||
dist: xenial
|
||||
matrix:
|
||||
include:
|
||||
- python: "3.7.2"
|
||||
env: TOX_POSARGS="-e py37"
|
||||
- python: "3.8"
|
||||
env: TOX_POSARGS="-e py38"
|
||||
- python: "3.9"
|
||||
env: TOX_POSARGS="-e py39"
|
||||
install:
|
||||
- travis_retry pip install -r requirements.txt
|
||||
- travis_retry pip install tox
|
||||
script:
|
||||
- pytest -vv
|
||||
- travis_wait tox $TOX_POSARGS
|
||||
15
Dockerfile
15
Dockerfile
@@ -1,12 +1,15 @@
|
||||
FROM python:3.6-alpine
|
||||
MAINTAINER Eric Lim <elim0322@gmail.com>
|
||||
FROM python:3.7
|
||||
MAINTAINER Evgeny Medvedev <evge.medvedev@gmail.com>
|
||||
ENV PROJECT_DIR=ethereum-etl
|
||||
|
||||
RUN mkdir /$PROJECT_DIR
|
||||
WORKDIR /$PROJECT_DIR
|
||||
COPY requirements.txt .
|
||||
RUN apk add --no-cache gcc musl-dev #for C libraries: <limits.h> <stdio.h>
|
||||
RUN pip install --upgrade pip && pip install -r /$PROJECT_DIR/requirements.txt
|
||||
COPY . .
|
||||
RUN pip install --upgrade pip && pip install -e /$PROJECT_DIR/[streaming]
|
||||
|
||||
ENTRYPOINT ["python", "export_all.py"]
|
||||
# Add Tini
|
||||
ENV TINI_VERSION v0.18.0
|
||||
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
|
||||
RUN chmod +x /tini
|
||||
|
||||
ENTRYPOINT ["/tini", "--", "python", "ethereumetl"]
|
||||
|
||||
4
LICENSE
4
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
Copyright (c) 2018-2025 Evgeny Medvedev, evge.medvedev@gmail.com, https://twitter.com/EvgeMedvedev
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
SOFTWARE.
|
||||
|
||||
507
README.md
507
README.md
@@ -1,460 +1,123 @@
|
||||
# Ethereum ETL
|
||||
|
||||
[](https://gitter.im/ethereum-etl/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[](https://travis-ci.org/medvedev1088/ethereum-etl)
|
||||
[](https://travis-ci.com/github/blockchain-etl/ethereum-etl)
|
||||
[](https://github.com/blockchain-etl/ethereum-etl/blob/develop/LICENSE)
|
||||
[](https://t.me/BlockchainETL)
|
||||
[](https://x.com/EthereumETL)
|
||||
|
||||
Export blocks and transactions ([Schema](#blockscsv), [Reference](#export_blocks_and_transactionspy)):
|
||||
Ethereum ETL lets you convert blockchain data into convenient formats like CSVs and relational databases.
|
||||
|
||||
*Do you just want to query Ethereum data right away? Use the [public dataset in BigQuery](https://console.cloud.google.com/marketplace/details/ethereum/crypto-ethereum-blockchain).*
|
||||
|
||||
[Full documentation available here](http://ethereum-etl.readthedocs.io/).
|
||||
|
||||
## Quickstart
|
||||
|
||||
Install Ethereum ETL:
|
||||
|
||||
```bash
|
||||
> python export_blocks_and_transactions.py --start-block 0 --end-block 500000 \
|
||||
--provider-uri https://mainnet.infura.io --blocks-output blocks.csv --transactions-output transactions.csv
|
||||
pip3 install ethereum-etl
|
||||
```
|
||||
|
||||
Export ERC20 and ERC721 transfers ([Schema](#token_transferscsv), [Reference](#export_token_transferspy)):
|
||||
Export blocks and transactions ([Schema](docs/schema.md#blockscsv), [Reference](docs/commands.md#export_blocks_and_transactions)):
|
||||
|
||||
```bash
|
||||
> python export_token_transfers.py --start-block 0 --end-block 500000 \
|
||||
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
|
||||
--blocks-output blocks.csv --transactions-output transactions.csv \
|
||||
--provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c
|
||||
```
|
||||
|
||||
Export ERC20 and ERC721 transfers ([Schema](docs/schema.md#token_transferscsv), [Reference](docs/commands.md#export_token_transfers)):
|
||||
|
||||
```bash
|
||||
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output token_transfers.csv
|
||||
```
|
||||
|
||||
Export receipts and logs ([Schema](#receiptscsv), [Reference](#export_receipts_and_logspy)):
|
||||
Export traces ([Schema](docs/schema.md#tracescsv), [Reference](docs/commands.md#export_traces)):
|
||||
|
||||
```bash
|
||||
> python export_receipts_and_logs.py --transaction-hashes transaction_hashes.txt \
|
||||
--provider-uri https://mainnet.infura.io --receipts-output receipts.csv --logs-output logs.csv
|
||||
> ethereumetl export_traces --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/parity.ipc --output traces.csv
|
||||
```
|
||||
|
||||
Export ERC20 and ERC721 token details ([Schema](#tokenscsv), [Reference](#export_tokenspy)):
|
||||
---
|
||||
|
||||
Stream blocks, transactions, logs, token_transfers continually to console ([Reference](docs/commands.md#stream)):
|
||||
|
||||
```bash
|
||||
> python export_tokens.py --token-addresses token_addresses.csv \
|
||||
--provider-uri https://mainnet.infura.io --output tokens.csv
|
||||
> pip3 install ethereum-etl[streaming]
|
||||
> ethereumetl stream --start-block 500000 -e block,transaction,log,token_transfer --log-file log.txt \
|
||||
--provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c
|
||||
```
|
||||
|
||||
[LIMITATIONS](#limitations)
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Schema](#schema)
|
||||
- [blocks.csv](#blockscsv)
|
||||
- [transactions.csv](#transactionscsv)
|
||||
- [token_transfers.csv](#token_transferscsv)
|
||||
- [receipts.csv](#receiptscsv)
|
||||
- [logs.csv](#logscsv)
|
||||
- [contracts.csv](#contractscsv)
|
||||
- [tokens.csv](#tokenscsv)
|
||||
- [Exporting the Blockchain](#exporting-the-blockchain)
|
||||
- [Export in 2 Hours](#export-in-2-hours)
|
||||
- [Command Reference](#command-reference)
|
||||
- [Querying in Amazon Athena](#querying-in-amazon-athena)
|
||||
- [Querying in Google BigQuery](#querying-in-google-bigquery)
|
||||
- [Public Dataset](#public-dataset)
|
||||
|
||||
|
||||
## Schema
|
||||
|
||||
### blocks.csv
|
||||
|
||||
Column | Type |
|
||||
------------------------|--------------------|
|
||||
number | bigint |
|
||||
hash | hex_string |
|
||||
parent_hash | hex_string |
|
||||
nonce | hex_string |
|
||||
sha3_uncles | hex_string |
|
||||
logs_bloom | hex_string |
|
||||
transactions_root | hex_string |
|
||||
state_root | hex_string |
|
||||
receipts_root | hex_string |
|
||||
miner | address |
|
||||
difficulty | numeric |
|
||||
total_difficulty | numeric |
|
||||
size | bigint |
|
||||
extra_data | hex_string |
|
||||
gas_limit | bigint |
|
||||
gas_used | bigint |
|
||||
timestamp | bigint |
|
||||
transaction_count | bigint |
|
||||
|
||||
### transactions.csv
|
||||
|
||||
Column | Type |
|
||||
--------------------|-------------|
|
||||
hash | hex_string |
|
||||
nonce | bigint |
|
||||
block_hash | hex_string |
|
||||
block_number | bigint |
|
||||
transaction_index| bigint |
|
||||
from_address | address |
|
||||
to_address | address |
|
||||
value | numeric |
|
||||
gas | bigint |
|
||||
gas_price | bigint |
|
||||
input | hex_string |
|
||||
|
||||
### token_transfers.csv
|
||||
|
||||
Column | Type |
|
||||
--------------------|-------------|
|
||||
token_address | address |
|
||||
from_address | address |
|
||||
to_address | address |
|
||||
value | numeric |
|
||||
transaction_hash | hex_string |
|
||||
log_index | bigint |
|
||||
block_number | bigint |
|
||||
|
||||
### receipts.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
transaction_hash | hex_string |
|
||||
transaction_index | bigint |
|
||||
block_hash | hex_string |
|
||||
block_number | bigint |
|
||||
cumulative_gas_used | bigint |
|
||||
gas_used | bigint |
|
||||
contract_address | address |
|
||||
root | hex_string |
|
||||
status | bigint |
|
||||
|
||||
### logs.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
log_index | bigint |
|
||||
transaction_hash | hex_string |
|
||||
transaction_index | bigint |
|
||||
block_hash | hex_string |
|
||||
block_number | bigint |
|
||||
address | address |
|
||||
data | hex_string |
|
||||
topics | string |
|
||||
|
||||
### contracts.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
address | address |
|
||||
bytecode | hex_string |
|
||||
function_sighashes | string |
|
||||
is_erc20 | boolean |
|
||||
is_erc721 | boolean |
|
||||
|
||||
### tokens.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
address | address |
|
||||
symbol | string |
|
||||
name | string |
|
||||
decimals | bigint |
|
||||
total_supply | numeric |
|
||||
|
||||
You can find column descriptions in [https://github.com/medvedev1088/ethereum-etl-airflow](https://github.com/medvedev1088/ethereum-etl-airflow/tree/master/dags/resources/stages/raw/schemas)
|
||||
|
||||
Note: for the `address` type all hex characters are lower-cased.
|
||||
`boolean` type can have 2 values: `True` or `False`.
|
||||
|
||||
## LIMITATIONS
|
||||
|
||||
- `contracts.csv` and `tokens.csv` files don’t include contracts created by message calls (a.k.a. internal transactions).
|
||||
We are working on adding support for those.
|
||||
- In case the contract is a proxy, which forwards all calls to a delegate, interface detection doesn’t work,
|
||||
which means `is_erc20` and `is_erc721` will always be false for proxy contracts.
|
||||
- The metadata methods (`symbol`, `name`, `decimals`, `total_supply`) for ERC20 are optional, so around 10% of the
|
||||
contracts are missing this data. Also some contracts (EOS) implement these methods but with wrong return type,
|
||||
so the metadata columns are missing in this case as well.
|
||||
- `token_transfers.value`, `tokens.decimals` and `tokens.total_supply` have type `STRING` in BigQuery tables,
|
||||
because numeric types there can't handle 32-byte integers. You should use
|
||||
`cast(value as FLOAT64)` (possible loss of precision) or
|
||||
`safe_cast(value as NUMERIC)` (possible overflow) to convert to numbers.
|
||||
- The contracts that don't implement `decimals()` function but have the
|
||||
[fallback function](https://solidity.readthedocs.io/en/v0.4.21/contracts.html#fallback-function) that returns a `boolean`
|
||||
will have `0` or `1` in the `decimals` column in the CSVs.
|
||||
|
||||
|
||||
|
||||
## Exporting the Blockchain
|
||||
|
||||
1. Install python 3.6 https://www.python.org/downloads/ (3.5 and 3.7 are not supported by this tool for now)
|
||||
|
||||
1. You can use Infura if you don't need ERC20 transfers (Infura doesn't support eth_getFilterLogs JSON RPC method).
|
||||
For that use `-p https://mainnet.infura.io` option for the commands below. If you need ERC20 transfers or want to
|
||||
export the data ~40 times faster, you will need to set up a local Ethereum node:
|
||||
|
||||
1. Install geth https://github.com/ethereum/go-ethereum/wiki/Installing-Geth
|
||||
|
||||
1. Start geth.
|
||||
Make sure it downloaded the blocks that you need by executing `eth.syncing` in the JS console.
|
||||
You can export blocks below `currentBlock`,
|
||||
there is no need to wait until the full sync as the state is not needed (unless you also need contracts bytecode
|
||||
and token details; for those you need to wait until the full sync).
|
||||
|
||||
1. Clone Ethereum ETL and install the dependencies:
|
||||
|
||||
```bash
|
||||
> git clone https://github.com/medvedev1088/ethereum-etl.git
|
||||
> cd ethereum-etl
|
||||
> pip install -r requirements.txt
|
||||
```
|
||||
|
||||
1. Export all:
|
||||
|
||||
```bash
|
||||
> ./export_all.sh -h
|
||||
Usage: ./export_all.sh -s <start_block> -e <end_block> -b <batch_size> -p <provider_uri> [-o <output_dir>]
|
||||
> ./export_all.sh -s 0 -e 5499999 -b 100000 -p file://$HOME/Library/Ethereum/geth.ipc -o output
|
||||
```
|
||||
|
||||
The result will be in the `output` subdirectory, partitioned in Hive style:
|
||||
|
||||
```bash
|
||||
output/blocks/start_block=00000000/end_block=00099999/blocks_00000000_00099999.csv
|
||||
output/blocks/start_block=00100000/end_block=00199999/blocks_00100000_00199999.csv
|
||||
...
|
||||
output/transactions/start_block=00000000/end_block=00099999/transactions_00000000_00099999.csv
|
||||
...
|
||||
output/token_transfers/start_block=00000000/end_block=00099999/token_transfers_00000000_00099999.csv
|
||||
...
|
||||
```
|
||||
|
||||
Should work with geth and parity, on Linux, Mac, Windows.
|
||||
If you use Parity you should disable warp mode with `--no-warp` option because warp mode
|
||||
does not place all of the block or receipt data into the database https://wiki.parity.io/Getting-Synced
|
||||
Tested with Python 3.6, geth 1.8.7, Ubuntu 16.04.4
|
||||
|
||||
If you see weird behavior, e.g. wrong number of rows in the CSV files or corrupted files,
|
||||
check this issue: https://github.com/medvedev1088/ethereum-etl/issues/28
|
||||
|
||||
#### Export in 2 Hours
|
||||
|
||||
You can use AWS Auto Scaling and Data Pipeline to reduce the exporting time to a few hours.
|
||||
Read this article for details https://medium.com/@medvedev1088/how-to-export-the-entire-ethereum-blockchain-to-csv-in-2-hours-for-10-69fef511e9a2
|
||||
|
||||
#### Running in Windows
|
||||
|
||||
Additional steps:
|
||||
|
||||
1. Install Visual C++ Build Tools https://landinghub.visualstudio.com/visual-cpp-build-tools
|
||||
|
||||
1. Install Git Bash with Git for Windows https://git-scm.com/download/win
|
||||
|
||||
1. Run in Git Bash:
|
||||
|
||||
```bash
|
||||
> ./export_all.sh -s 0 -e 999999 -b 100000 -p 'file:\\\\.\pipe\geth.ipc' -o output
|
||||
```
|
||||
|
||||
#### Running in Docker
|
||||
|
||||
1. Install Docker https://docs.docker.com/install/
|
||||
|
||||
1. Build a docker image
|
||||
```bash
|
||||
> docker build -t ethereum-etl:latest .
|
||||
> docker image ls
|
||||
```
|
||||
|
||||
1. Run a container out of the image
|
||||
```bash
|
||||
> docker run -v $HOME/output:/ethereum-etl/output ethereum-etl:latest -s 0 -e 5499999 -b 100000 -p https://mainnet.infura.io
|
||||
> docker run -v $HOME/output:/ethereum-etl/output ethereum-etl:latest -s 2018-01-01 -e 2018-01-01 -b 100000 -p https://mainnet.infura.io
|
||||
```
|
||||
|
||||
#### Command Reference
|
||||
|
||||
- [export_blocks_and_transactions.py](#export_blocks_and_transactionspy)
|
||||
- [export_token_transfers.py](#export_token_transferspy)
|
||||
- [extract_token_transfers.py](#extract_token_transferspy)
|
||||
- [export_receipts_and_logs.py](#export_receipts_and_logspy)
|
||||
- [export_contracts.py](#export_contractspy)
|
||||
- [export_tokens.py](#export_tokenspy)
|
||||
- [get_block_range_for_date.py](#get_block_range_for_datepy)
|
||||
- [get_keccak_hash.py](#get_keccak_hashpy)
|
||||
|
||||
All the commands accept `-h` parameter for help, e.g.:
|
||||
Find other commands [here](https://ethereum-etl.readthedocs.io/en/latest/commands/).
|
||||
|
||||
For the latest version, check out the repo and call
|
||||
```bash
|
||||
> python export_blocks_and_transactions.py -h
|
||||
|
||||
usage: export_blocks_and_transactions.py [-h] [-s START_BLOCK] -e END_BLOCK
|
||||
[-b BATCH_SIZE] --provider-uri PROVIDER_URI
|
||||
[-w MAX_WORKERS]
|
||||
[--blocks-output BLOCKS_OUTPUT]
|
||||
[--transactions-output TRANSACTIONS_OUTPUT]
|
||||
|
||||
Export blocks and transactions.
|
||||
> pip3 install -e .
|
||||
> python3 ethereumetl.py
|
||||
```
|
||||
|
||||
For the `--output` parameters the supported types are csv and json. The format type is inferred from the output file name.
|
||||
## Useful Links
|
||||
|
||||
##### export_blocks_and_transactions.py
|
||||
|
||||
```bash
|
||||
> python export_blocks_and_transactions.py --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --blocks-output blocks.csv --transactions-output transactions.csv
|
||||
```
|
||||
|
||||
Omit `--blocks-output` or `--transactions-output` options if you want to export only transactions/blocks.
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
##### export_token_transfers.py
|
||||
|
||||
The API used in this command is not supported by Infura, so you will need a local node.
|
||||
If you want to use Infura for exporting ERC20 transfers refer to [extract_token_transfers.py](#extract_token_transferspy)
|
||||
|
||||
```bash
|
||||
> python export_token_transfers.py --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --batch-size 100 --output token_transfers.csv
|
||||
```
|
||||
|
||||
Include `--tokens <token1> <token2>` to filter only certain tokens, e.g.
|
||||
|
||||
```bash
|
||||
> python export_token_transfers.py --start-block 0 --end-block 500000 --provider-uri file://$HOME/Library/Ethereum/geth.ipc \
|
||||
--output token_transfers.csv --tokens 0x86fa049857e0209aa7d9e616f7eb3b3b78ecfdb0 0x06012c8cf97bead5deae237070f9587f8e7a266d
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
##### export_receipts_and_logs.py
|
||||
|
||||
First extract transaction hashes from `transactions.csv`
|
||||
(Exported with [export_blocks_and_transactions.py](#export_blocks_and_transactionspy)):
|
||||
|
||||
```bash
|
||||
> python extract_csv_column.py --input transactions.csv --column transaction_hash --output transaction_hashes.txt
|
||||
```
|
||||
|
||||
Then export receipts and logs:
|
||||
|
||||
```bash
|
||||
> python export_receipts_and_logs.py --transaction-hashes transaction_hashes.txt \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --receipts-output receipts.csv --logs-output logs.csv
|
||||
```
|
||||
|
||||
Omit `--receipts-output` or `--logs-output` options if you want to export only logs/receipts.
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
Upvote this feature request https://github.com/paritytech/parity/issues/9075,
|
||||
it will make receipts and logs export much faster.
|
||||
|
||||
##### extract_token_transfers.py
|
||||
|
||||
First export receipt logs with [export_receipts_and_logs.py](#export_receipts_and_logspy).
|
||||
|
||||
Then extract transfers from the logs.csv file:
|
||||
|
||||
```bash
|
||||
> python extract_token_transfers.py --logs logs.csv --output token_transfers.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
##### export_contracts.py
|
||||
|
||||
First extract contract addresses from `receipts.csv`
|
||||
(Exported with [export_receipts_and_logs.py](#export_receipts_and_logspy)):
|
||||
|
||||
```bash
|
||||
> python extract_csv_column.py --input receipts.csv --column contract_address --output contract_addresses.txt
|
||||
```
|
||||
|
||||
Then export contracts:
|
||||
|
||||
```bash
|
||||
> python export_contracts.py --contract-addresses contract_addresses.txt \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output contracts.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
##### export_tokens.py
|
||||
|
||||
First extract token addresses from `contracts.json`
|
||||
(Exported with [export_contracts.py](#export_contractspy)):
|
||||
|
||||
```bash
|
||||
> python filter_items.py -i contracts.json -p "item['is_erc20'] or item['is_erc721']" | \
|
||||
python extract_field.py -f address -o token_addresses.txt
|
||||
```
|
||||
|
||||
Then export ERC20 / ERC721 tokens:
|
||||
|
||||
```bash
|
||||
> python export_tokens.py --token-addresses token_addresses.txt \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output tokens.csv
|
||||
```
|
||||
|
||||
You can tune `--max-workers` for performance.
|
||||
|
||||
##### get_block_range_for_date.py
|
||||
|
||||
```bash
|
||||
> python get_block_range_for_date.py --provider-uri=https://mainnet.infura.io --date 2018-01-01
|
||||
4832686,4838611
|
||||
```
|
||||
|
||||
##### get_keccak_hash.py
|
||||
|
||||
```bash
|
||||
> python get_keccak_hash.py -i "transfer(address,uint256)"
|
||||
0xa9059cbb2ab09eb219583f4a59a5d0623ade346d962bcd4e46b11da047c9049b
|
||||
```
|
||||
|
||||
#### Running Tests
|
||||
- [Schema](https://ethereum-etl.readthedocs.io/en/latest/schema/)
|
||||
- [Command Reference](https://ethereum-etl.readthedocs.io/en/latest/commands/)
|
||||
- [Documentation](https://ethereum-etl.readthedocs.io/)
|
||||
- [Public Datasets in BigQuery](https://github.com/blockchain-etl/public-datasets)
|
||||
- [Exporting the Blockchain](https://ethereum-etl.readthedocs.io/en/latest/exporting-the-blockchain/)
|
||||
- [Querying in Amazon Athena](https://ethereum-etl.readthedocs.io/en/latest/amazon-athena/)
|
||||
- [Querying in Google BigQuery](https://ethereum-etl.readthedocs.io/en/latest/google-bigquery/)
|
||||
- [Querying in Kaggle](https://www.kaggle.com/bigquery/ethereum-blockchain)
|
||||
- [Airflow DAGs](https://github.com/blockchain-etl/ethereum-etl-airflow)
|
||||
- [Postgres ETL](https://github.com/blockchain-etl/ethereum-etl-postgresql)
|
||||
- [Ethereum 2.0 ETL](https://github.com/blockchain-etl/ethereum2-etl)
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
> pip3 install -e .[dev,streaming]
|
||||
> export ETHEREUM_ETL_RUN_SLOW_TESTS=True
|
||||
> export PROVIDER_URL=<your_provider_uri>
|
||||
> pytest -vv
|
||||
```
|
||||
```
|
||||
|
||||
## Querying in Amazon Athena
|
||||
|
||||
- Upload the files to S3:
|
||||
### Running Tox Tests
|
||||
|
||||
```bash
|
||||
> cd output
|
||||
> aws s3 sync . s3://<your_bucket>/ethereumetl/export --region ap-southeast-1
|
||||
> pip3 install tox
|
||||
> tox
|
||||
```
|
||||
|
||||
- Sign in to Athena https://console.aws.amazon.com/athena/home
|
||||
## Running in Docker
|
||||
|
||||
- Create a database:
|
||||
1. Install Docker: https://docs.docker.com/get-docker/
|
||||
|
||||
```sql
|
||||
CREATE DATABASE ethereumetl;
|
||||
2. Build a docker image
|
||||
|
||||
> docker build -t ethereum-etl:latest .
|
||||
> docker image ls
|
||||
|
||||
3. Run a container out of the image
|
||||
|
||||
> docker run -v $HOME/output:/ethereum-etl/output ethereum-etl:latest export_all -s 0 -e 5499999 -b 100000 -p https://mainnet.infura.io
|
||||
> docker run -v $HOME/output:/ethereum-etl/output ethereum-etl:latest export_all -s 2018-01-01 -e 2018-01-01 -p https://mainnet.infura.io
|
||||
|
||||
4. Run streaming to console or Pub/Sub
|
||||
|
||||
> docker build -t ethereum-etl:latest .
|
||||
> echo "Stream to console"
|
||||
> docker run ethereum-etl:latest stream --start-block 500000 --log-file log.txt
|
||||
> echo "Stream to Pub/Sub"
|
||||
> docker run -v /path_to_credentials_file/:/ethereum-etl/ --env GOOGLE_APPLICATION_CREDENTIALS=/ethereum-etl/credentials_file.json ethereum-etl:latest stream --start-block 500000 --output projects/<your_project>/topics/crypto_ethereum
|
||||
|
||||
If running on an Apple M1 chip add the `--platform linux/x86_64` option to the `build` and `run` commands e.g.:
|
||||
|
||||
```
|
||||
docker build --platform linux/x86_64 -t ethereum-etl:latest .
|
||||
docker run --platform linux/x86_64 ethereum-etl:latest stream --start-block 500000
|
||||
```
|
||||
|
||||
- Create the tables:
|
||||
- blocks: [schemas/aws/blocks.sql](schemas/aws/blocks.sql)
|
||||
- transactions: [schemas/aws/transactions.sql](schemas/aws/transactions.sql)
|
||||
- token_transfers: [schemas/aws/token_transfers.sql](schemas/aws/token_transfers.sql)
|
||||
- contracts: [schemas/aws/contracts.sql](schemas/aws/contracts.sql)
|
||||
- receipts: [schemas/aws/receipts.sql](schemas/aws/receipts.sql)
|
||||
- logs: [schemas/aws/logs.sql](schemas/aws/logs.sql)
|
||||
- tokens: [schemas/aws/tokens.sql](schemas/aws/tokens.sql)
|
||||
|
||||
### Tables for Parquet Files
|
||||
|
||||
Read this article on how to convert CSVs to Parquet https://medium.com/@medvedev1088/converting-ethereum-etl-files-to-parquet-399e048ddd30
|
||||
|
||||
- Create the tables:
|
||||
- parquet_blocks: [schemas/aws/parquet/parquet_blocks.sql](schemas/aws/parquet/parquet_blocks.sql)
|
||||
- parquet_transactions: [schemas/aws/parquet/parquet_transactions.sql](schemas/aws/parquet/parquet_transactions.sql)
|
||||
- parquet_token_transfers: [schemas/aws/parquet/parquet_token_transfers.sql](schemas/aws/parquet/parquet_token_transfers.sql)
|
||||
|
||||
Note that DECIMAL type is limited to 38 digits in Hive https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-decimal
|
||||
so values greater than 38 decimals will be null.
|
||||
|
||||
## Querying in Google BigQuery
|
||||
|
||||
Refer to https://github.com/medvedev1088/ethereum-etl-airflow for the instructions.
|
||||
|
||||
### Public Dataset
|
||||
|
||||
You can query the data that's updated daily in the public BigQuery dataset
|
||||
https://medium.com/@medvedev1088/ethereum-blockchain-on-google-bigquery-283fb300f579
|
||||
## Projects using Ethereum ETL
|
||||
* [Google](https://goo.gl/oY5BCQ) - Public BigQuery Ethereum datasets
|
||||
* [Nansen](https://nansen.ai/query?ref=ethereumetl) - Analytics platform for Ethereum
|
||||
|
||||
0
blockchainetl/__init__.py
Normal file
0
blockchainetl/__init__.py
Normal file
35
blockchainetl/atomic_counter.py
Normal file
35
blockchainetl/atomic_counter.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import itertools
|
||||
|
||||
|
||||
# https://stackoverflow.com/a/27062830/1580227
|
||||
class AtomicCounter:
|
||||
def __init__(self):
|
||||
self._counter = itertools.count()
|
||||
# init to 0
|
||||
next(self._counter)
|
||||
|
||||
def increment(self, increment=1):
|
||||
assert increment > 0
|
||||
return [next(self._counter) for _ in range(0, increment)][-1]
|
||||
42
blockchainetl/csv_utils.py
Normal file
42
blockchainetl/csv_utils.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
# https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072
|
||||
|
||||
import sys
|
||||
import csv
|
||||
|
||||
|
||||
def set_max_field_size_limit():
|
||||
max_int = sys.maxsize
|
||||
decrement = True
|
||||
while decrement:
|
||||
# decrease the maxInt value by factor 10
|
||||
# as long as the OverflowError occurs.
|
||||
|
||||
decrement = False
|
||||
try:
|
||||
csv.field_size_limit(max_int)
|
||||
except OverflowError:
|
||||
max_int = int(max_int / 10)
|
||||
decrement = True
|
||||
220
blockchainetl/exporters.py
Normal file
220
blockchainetl/exporters.py
Normal file
@@ -0,0 +1,220 @@
|
||||
# Copyright (c) Scrapy developers.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without modification,
|
||||
# are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions, and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions, and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of Scrapy nor the names of its contributors may be used
|
||||
# to endorse or promote products derived from this software without
|
||||
# specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""
|
||||
Item Exporters are used to export/serialize items into different formats.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import io
|
||||
import threading
|
||||
from json import JSONEncoder
|
||||
|
||||
import decimal
|
||||
import six
|
||||
|
||||
|
||||
class BaseItemExporter(object):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self._configure(kwargs)
|
||||
|
||||
def _configure(self, options, dont_fail=False):
|
||||
"""Configure the exporter by popping options from the ``options`` dict.
|
||||
If dont_fail is set, it won't raise an exception on unexpected options
|
||||
(useful for using with keyword arguments in subclasses constructors)
|
||||
"""
|
||||
self.encoding = options.pop('encoding', None)
|
||||
self.fields_to_export = options.pop('fields_to_export', None)
|
||||
self.export_empty_fields = options.pop('export_empty_fields', False)
|
||||
self.indent = options.pop('indent', None)
|
||||
if not dont_fail and options:
|
||||
raise TypeError("Unexpected options: %s" % ', '.join(options.keys()))
|
||||
|
||||
def export_item(self, item):
|
||||
raise NotImplementedError
|
||||
|
||||
def serialize_field(self, field, name, value):
|
||||
serializer = field.get('serializer', lambda x: x)
|
||||
return serializer(value)
|
||||
|
||||
def start_exporting(self):
|
||||
pass
|
||||
|
||||
def finish_exporting(self):
|
||||
pass
|
||||
|
||||
def _get_serialized_fields(self, item, default_value=None, include_empty=None):
|
||||
"""Return the fields to export as an iterable of tuples
|
||||
(name, serialized_value)
|
||||
"""
|
||||
if include_empty is None:
|
||||
include_empty = self.export_empty_fields
|
||||
if self.fields_to_export is None:
|
||||
if include_empty and not isinstance(item, dict):
|
||||
field_iter = six.iterkeys(item.fields)
|
||||
else:
|
||||
field_iter = six.iterkeys(item)
|
||||
else:
|
||||
if include_empty:
|
||||
field_iter = self.fields_to_export
|
||||
else:
|
||||
field_iter = (x for x in self.fields_to_export if x in item)
|
||||
|
||||
for field_name in field_iter:
|
||||
if field_name in item:
|
||||
field = {} if isinstance(item, dict) else item.fields[field_name]
|
||||
value = self.serialize_field(field, field_name, item[field_name])
|
||||
else:
|
||||
value = default_value
|
||||
|
||||
yield field_name, value
|
||||
|
||||
|
||||
class CsvItemExporter(BaseItemExporter):
|
||||
|
||||
def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs):
|
||||
self._configure(kwargs, dont_fail=True)
|
||||
if not self.encoding:
|
||||
self.encoding = 'utf-8'
|
||||
self.include_headers_line = include_headers_line
|
||||
self.stream = io.TextIOWrapper(
|
||||
file,
|
||||
line_buffering=False,
|
||||
write_through=True,
|
||||
encoding=self.encoding
|
||||
) if six.PY3 else file
|
||||
self.csv_writer = csv.writer(self.stream, **kwargs)
|
||||
self._headers_not_written = True
|
||||
self._join_multivalued = join_multivalued
|
||||
self._write_headers_lock = threading.Lock()
|
||||
|
||||
def serialize_field(self, field, name, value):
|
||||
serializer = field.get('serializer', self._join_if_needed)
|
||||
return serializer(value)
|
||||
|
||||
def _join_if_needed(self, value):
|
||||
def to_string(x):
|
||||
if isinstance(x, dict):
|
||||
# Separators without whitespace for compact format.
|
||||
return JSONEncoder(separators=(',', ':')).encode(x)
|
||||
else:
|
||||
return str(x)
|
||||
|
||||
if isinstance(value, (list, tuple)):
|
||||
try:
|
||||
return self._join_multivalued.join(to_string(x) for x in value)
|
||||
except TypeError: # list in value may not contain strings
|
||||
pass
|
||||
return value
|
||||
|
||||
def export_item(self, item):
|
||||
# Double-checked locking (safe in Python because of GIL) https://en.wikipedia.org/wiki/Double-checked_locking
|
||||
if self._headers_not_written:
|
||||
with self._write_headers_lock:
|
||||
if self._headers_not_written:
|
||||
self._write_headers_and_set_fields_to_export(item)
|
||||
self._headers_not_written = False
|
||||
|
||||
fields = self._get_serialized_fields(item, default_value='',
|
||||
include_empty=True)
|
||||
values = list(self._build_row(x for _, x in fields))
|
||||
self.csv_writer.writerow(values)
|
||||
|
||||
def _build_row(self, values):
|
||||
for s in values:
|
||||
try:
|
||||
yield to_native_str(s, self.encoding)
|
||||
except TypeError:
|
||||
yield s
|
||||
|
||||
def _write_headers_and_set_fields_to_export(self, item):
|
||||
if self.include_headers_line:
|
||||
if not self.fields_to_export:
|
||||
if isinstance(item, dict):
|
||||
# for dicts try using fields of the first item
|
||||
self.fields_to_export = list(item.keys())
|
||||
else:
|
||||
# use fields declared in Item
|
||||
self.fields_to_export = list(item.fields.keys())
|
||||
row = list(self._build_row(self.fields_to_export))
|
||||
self.csv_writer.writerow(row)
|
||||
|
||||
def EncodeDecimal(o):
|
||||
if isinstance(o, decimal.Decimal):
|
||||
return float(round(o, 8))
|
||||
raise TypeError(repr(o) + " is not JSON serializable")
|
||||
|
||||
class JsonLinesItemExporter(BaseItemExporter):
|
||||
|
||||
def __init__(self, file, **kwargs):
|
||||
self._configure(kwargs, dont_fail=True)
|
||||
self.file = file
|
||||
kwargs.setdefault('ensure_ascii', not self.encoding)
|
||||
# kwargs.setdefault('default', EncodeDecimal)
|
||||
self.encoder = JSONEncoder(default=EncodeDecimal, **kwargs)
|
||||
|
||||
def export_item(self, item):
|
||||
itemdict = dict(self._get_serialized_fields(item))
|
||||
data = self.encoder.encode(itemdict) + '\n'
|
||||
self.file.write(to_bytes(data, self.encoding))
|
||||
|
||||
|
||||
def to_native_str(text, encoding=None, errors='strict'):
|
||||
""" Return str representation of `text`
|
||||
(bytes in Python 2.x and unicode in Python 3.x). """
|
||||
if six.PY2:
|
||||
return to_bytes(text, encoding, errors)
|
||||
else:
|
||||
return to_unicode(text, encoding, errors)
|
||||
|
||||
|
||||
def to_bytes(text, encoding=None, errors='strict'):
|
||||
"""Return the binary representation of `text`. If `text`
|
||||
is already a bytes object, return it as-is."""
|
||||
if isinstance(text, bytes):
|
||||
return text
|
||||
if not isinstance(text, six.string_types):
|
||||
raise TypeError('to_bytes must receive a unicode, str or bytes '
|
||||
'object, got %s' % type(text).__name__)
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
return text.encode(encoding, errors)
|
||||
|
||||
|
||||
def to_unicode(text, encoding=None, errors='strict'):
|
||||
"""Return the unicode representation of a bytes object `text`. If `text`
|
||||
is already an unicode object, return it as-is."""
|
||||
if isinstance(text, six.text_type):
|
||||
return text
|
||||
if not isinstance(text, (bytes, six.text_type)):
|
||||
raise TypeError('to_unicode must receive a bytes, str or unicode '
|
||||
'object, got %s' % type(text).__name__)
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
return text.decode(encoding, errors)
|
||||
0
blockchainetl/jobs/__init__.py
Normal file
0
blockchainetl/jobs/__init__.py
Normal file
0
blockchainetl/jobs/exporters/__init__.py
Normal file
0
blockchainetl/jobs/exporters/__init__.py
Normal file
@@ -21,26 +21,29 @@
|
||||
# SOFTWARE.
|
||||
import logging
|
||||
|
||||
from ethereumetl.atomic_counter import AtomicCounter
|
||||
from ethereumetl.exporters import CsvItemExporter, JsonLinesItemExporter
|
||||
from ethereumetl.file_utils import get_file_handle, close_silently
|
||||
from blockchainetl.atomic_counter import AtomicCounter
|
||||
from blockchainetl.exporters import CsvItemExporter, JsonLinesItemExporter
|
||||
from blockchainetl.file_utils import get_file_handle, close_silently
|
||||
from blockchainetl.jobs.exporters.converters.composite_item_converter import CompositeItemConverter
|
||||
|
||||
|
||||
class CompositeItemExporter:
|
||||
def __init__(self, filename_mapping, field_mapping):
|
||||
def __init__(self, filename_mapping, field_mapping=None, converters=()):
|
||||
self.filename_mapping = filename_mapping
|
||||
self.field_mapping = field_mapping
|
||||
self.field_mapping = field_mapping or {}
|
||||
|
||||
self.file_mapping = {}
|
||||
self.exporter_mapping = {}
|
||||
self.counter_mapping = {}
|
||||
|
||||
self.converter = CompositeItemConverter(converters)
|
||||
|
||||
self.logger = logging.getLogger('CompositeItemExporter')
|
||||
|
||||
def open(self):
|
||||
for item_type, filename in self.filename_mapping.items():
|
||||
file = get_file_handle(filename, binary=True)
|
||||
fields = self.field_mapping[item_type]
|
||||
fields = self.field_mapping.get(item_type)
|
||||
self.file_mapping[item_type] = file
|
||||
if str(filename).endswith('.json'):
|
||||
item_exporter = JsonLinesItemExporter(file, fields_to_export=fields)
|
||||
@@ -50,17 +53,21 @@ class CompositeItemExporter:
|
||||
|
||||
self.counter_mapping[item_type] = AtomicCounter()
|
||||
|
||||
def export_item(self, item):
|
||||
item_type = item.get('type', None)
|
||||
if item_type is None:
|
||||
raise ValueError('type key is not found in item {}'.format(repr(item)))
|
||||
def export_items(self, items):
|
||||
for item in items:
|
||||
self.export_item(item)
|
||||
|
||||
exporter = self.exporter_mapping[item_type]
|
||||
def export_item(self, item):
|
||||
item_type = item.get('type')
|
||||
if item_type is None:
|
||||
raise ValueError('"type" key is not found in item {}'.format(repr(item)))
|
||||
|
||||
exporter = self.exporter_mapping.get(item_type)
|
||||
if exporter is None:
|
||||
raise ValueError('Exporter for item type {} not found'.format(item_type))
|
||||
exporter.export_item(item)
|
||||
exporter.export_item(self.converter.convert_item(item))
|
||||
|
||||
counter = self.counter_mapping[item_type]
|
||||
counter = self.counter_mapping.get(item_type)
|
||||
if counter is not None:
|
||||
counter.increment()
|
||||
|
||||
@@ -20,21 +20,19 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import argparse
|
||||
import json
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
|
||||
parser = argparse.ArgumentParser(description='Extracts a single field from a given file.')
|
||||
parser.add_argument('-i', '--input', default='-', type=str, help='The input file. If not specified stdin is used.')
|
||||
parser.add_argument('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
parser.add_argument('-f', '--field', required=True, type=str, help='The field name to extract.')
|
||||
class ConsoleItemExporter:
|
||||
def open(self):
|
||||
pass
|
||||
|
||||
args = parser.parse_args()
|
||||
def export_items(self, items):
|
||||
for item in items:
|
||||
self.export_item(item)
|
||||
|
||||
# TODO: Add support for CSV
|
||||
with smart_open(args.input, 'r') as input_file, smart_open(args.output, 'w') as output_file:
|
||||
for line in input_file:
|
||||
item = json.loads(line)
|
||||
output_file.write(item[args.field] + '\n')
|
||||
def export_item(self, item):
|
||||
print(json.dumps(item))
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
0
blockchainetl/jobs/exporters/converters/__init__.py
Normal file
0
blockchainetl/jobs/exporters/converters/__init__.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# MIT License
|
||||
#
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
|
||||
class CompositeItemConverter:
|
||||
|
||||
def __init__(self, converters=()):
|
||||
self.converters = converters
|
||||
|
||||
def convert_item(self, item):
|
||||
if self.converters is None:
|
||||
return item
|
||||
|
||||
for converter in self.converters:
|
||||
item = converter.convert_item(item)
|
||||
return item
|
||||
@@ -0,0 +1,47 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# MIT License
|
||||
#
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
#
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
from blockchainetl.jobs.exporters.converters.simple_item_converter import SimpleItemConverter
|
||||
|
||||
# Large ints are not handled correctly by pg8000 so we use Decimal instead:
|
||||
# https://github.com/mfenniak/pg8000/blob/412eace074514ada824e7a102765e37e2cda8eaa/pg8000/core.py#L1703
|
||||
class IntToDecimalItemConverter(SimpleItemConverter):
|
||||
|
||||
def convert_field(self, key, value):
|
||||
if isinstance(value, int):
|
||||
return Decimal(value)
|
||||
else:
|
||||
return value
|
||||
@@ -0,0 +1,46 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# MIT License
|
||||
#
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
|
||||
from blockchainetl.jobs.exporters.converters.simple_item_converter import SimpleItemConverter
|
||||
|
||||
|
||||
class IntToStringItemConverter(SimpleItemConverter):
|
||||
|
||||
def __init__(self, keys=None):
|
||||
self.keys = set(keys) if keys else None
|
||||
|
||||
def convert_field(self, key, value):
|
||||
if isinstance(value, int) and (self.keys is None or key in self.keys):
|
||||
return str(value)
|
||||
else:
|
||||
return value
|
||||
@@ -0,0 +1,56 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# MIT License
|
||||
#
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
|
||||
class ListFieldItemConverter:
|
||||
|
||||
def __init__(self, field, new_field_prefix, fill=0, fill_with=None):
|
||||
self.field = field
|
||||
self.new_field_prefix = new_field_prefix
|
||||
self.fill = fill
|
||||
self.fill_with = fill_with
|
||||
|
||||
def convert_item(self, item):
|
||||
if not item:
|
||||
return item
|
||||
|
||||
lst = item.get(self.field)
|
||||
result = item
|
||||
if lst is not None and isinstance(lst, list):
|
||||
result = item.copy()
|
||||
del result[self.field]
|
||||
for lst_item_index, lst_item in enumerate(lst):
|
||||
result[self.new_field_prefix + str(lst_item_index)] = lst_item
|
||||
if len(lst) < self.fill:
|
||||
for i in range(len(lst), self.fill):
|
||||
result[self.new_field_prefix + str(i)] = self.fill_with
|
||||
return result
|
||||
@@ -0,0 +1,47 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# MIT License
|
||||
#
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
class SimpleItemConverter:
|
||||
|
||||
def __init__(self, field_converters=None):
|
||||
self.field_converters = field_converters
|
||||
|
||||
def convert_item(self, item):
|
||||
return {
|
||||
key: self.convert_field(key, value) for key, value in item.items()
|
||||
}
|
||||
|
||||
def convert_field(self, key, value):
|
||||
if self.field_converters is not None and key in self.field_converters:
|
||||
return self.field_converters[key](value)
|
||||
else:
|
||||
return value
|
||||
@@ -0,0 +1,41 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2020 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from blockchainetl.jobs.exporters.converters.simple_item_converter import SimpleItemConverter
|
||||
|
||||
|
||||
class UnixTimestampItemConverter(SimpleItemConverter):
|
||||
|
||||
def convert_field(self, key, value):
|
||||
if key is not None and key.endswith('timestamp'):
|
||||
return to_timestamp(value)
|
||||
else:
|
||||
return value
|
||||
|
||||
|
||||
def to_timestamp(value):
|
||||
if isinstance(value, int):
|
||||
return datetime.utcfromtimestamp(value).strftime('%Y-%m-%d %H:%M:%S')
|
||||
else:
|
||||
return value
|
||||
111
blockchainetl/jobs/exporters/gcs_item_exporter.py
Normal file
111
blockchainetl/jobs/exporters/gcs_item_exporter.py
Normal file
@@ -0,0 +1,111 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2020 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import json
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
|
||||
from google.cloud import storage
|
||||
|
||||
|
||||
def build_block_bundles(items):
|
||||
blocks = defaultdict(list)
|
||||
transactions = defaultdict(list)
|
||||
logs = defaultdict(list)
|
||||
token_transfers = defaultdict(list)
|
||||
traces = defaultdict(list)
|
||||
for item in items:
|
||||
item_type = item.get('type')
|
||||
if item_type == 'block':
|
||||
blocks[item.get('number')].append(item)
|
||||
elif item_type == 'transaction':
|
||||
transactions[item.get('block_number')].append(item)
|
||||
elif item_type == 'log':
|
||||
logs[item.get('block_number')].append(item)
|
||||
elif item_type == 'token_transfer':
|
||||
token_transfers[item.get('block_number')].append(item)
|
||||
elif item_type == 'trace':
|
||||
traces[item.get('block_number')].append(item)
|
||||
else:
|
||||
logging.info(f'Skipping item with type {item_type}')
|
||||
|
||||
block_bundles = []
|
||||
for block_number in sorted(blocks.keys()):
|
||||
if len(blocks[block_number]) != 1:
|
||||
raise ValueError(f'There must be a single block for a given block number, was {len(blocks[block_number])} for block number {block_number}')
|
||||
block_bundles.append({
|
||||
'block': blocks[block_number][0],
|
||||
'transactions': transactions[block_number],
|
||||
'logs': logs[block_number],
|
||||
'token_transfers': token_transfers[block_number],
|
||||
'traces': traces[block_number],
|
||||
})
|
||||
|
||||
return block_bundles
|
||||
|
||||
|
||||
class GcsItemExporter:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
bucket,
|
||||
path='blocks',
|
||||
build_block_bundles_func=build_block_bundles):
|
||||
self.bucket = bucket
|
||||
self.path = normalize_path(path)
|
||||
self.build_block_bundles_func = build_block_bundles_func
|
||||
self.storage_client = storage.Client()
|
||||
|
||||
def open(self):
|
||||
pass
|
||||
|
||||
def export_items(self, items):
|
||||
block_bundles = self.build_block_bundles_func(items)
|
||||
|
||||
for block_bundle in block_bundles:
|
||||
block = block_bundle.get('block')
|
||||
if block is None:
|
||||
raise ValueError('block_bundle must include the block field')
|
||||
block_number = block.get('number')
|
||||
if block_number is None:
|
||||
raise ValueError('block_bundle must include the block.number field')
|
||||
|
||||
destination_blob_name = f'{self.path}/{block_number}.json'
|
||||
|
||||
bucket = self.storage_client.bucket(self.bucket)
|
||||
blob = bucket.blob(destination_blob_name)
|
||||
blob.upload_from_string(json.dumps(block_bundle))
|
||||
logging.info(f'Uploaded file gs://{self.bucket}/{destination_blob_name}')
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
def normalize_path(p):
|
||||
if p is None:
|
||||
p = ''
|
||||
if p.startswith('/'):
|
||||
p = p[1:]
|
||||
if p.endswith('/'):
|
||||
p = p[:len(p) - 1]
|
||||
|
||||
return p
|
||||
105
blockchainetl/jobs/exporters/google_pubsub_item_exporter.py
Normal file
105
blockchainetl/jobs/exporters/google_pubsub_item_exporter.py
Normal file
@@ -0,0 +1,105 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from google.cloud import pubsub_v1
|
||||
from timeout_decorator import timeout_decorator
|
||||
|
||||
|
||||
class GooglePubSubItemExporter:
|
||||
|
||||
def __init__(self, item_type_to_topic_mapping, message_attributes=(),
|
||||
batch_max_bytes=1024 * 5, batch_max_latency=1, batch_max_messages=1000,
|
||||
enable_message_ordering=False):
|
||||
self.item_type_to_topic_mapping = item_type_to_topic_mapping
|
||||
|
||||
self.batch_max_bytes = batch_max_bytes
|
||||
self.batch_max_latency = batch_max_latency
|
||||
self.batch_max_messages = batch_max_messages
|
||||
|
||||
self.enable_message_ordering = enable_message_ordering
|
||||
|
||||
self.publisher = self.create_publisher()
|
||||
|
||||
self.message_attributes = message_attributes
|
||||
|
||||
def open(self):
|
||||
pass
|
||||
|
||||
def export_items(self, items):
|
||||
try:
|
||||
self._export_items_with_timeout(items)
|
||||
except timeout_decorator.TimeoutError as e:
|
||||
# A bug in PubSub publisher that makes it stalled after running for some time.
|
||||
# Exception in thread Thread-CommitBatchPublisher:
|
||||
# details = "channel is in state TRANSIENT_FAILURE"
|
||||
# https://stackoverflow.com/questions/55552606/how-can-one-catch-exceptions-in-python-pubsub-subscriber-that-are-happening-in-i?noredirect=1#comment97849067_55552606
|
||||
logging.info('Recreating Pub/Sub publisher.')
|
||||
self.publisher = self.create_publisher()
|
||||
raise e
|
||||
|
||||
@timeout_decorator.timeout(300)
|
||||
def _export_items_with_timeout(self, items):
|
||||
futures = []
|
||||
for item in items:
|
||||
message_future = self.export_item(item)
|
||||
futures.append(message_future)
|
||||
|
||||
for future in futures:
|
||||
# result() blocks until the message is published.
|
||||
future.result()
|
||||
|
||||
def export_item(self, item):
|
||||
item_type = item.get('type')
|
||||
if item_type is not None and item_type in self.item_type_to_topic_mapping:
|
||||
topic_path = self.item_type_to_topic_mapping.get(item_type)
|
||||
data = json.dumps(item).encode('utf-8')
|
||||
|
||||
ordering_key = 'all' if self.enable_message_ordering else ''
|
||||
message_future = self.publisher.publish(topic_path, data=data, ordering_key=ordering_key, **self.get_message_attributes(item))
|
||||
return message_future
|
||||
else:
|
||||
logging.warning('Topic for item type "{}" is not configured.'.format(item_type))
|
||||
|
||||
def get_message_attributes(self, item):
|
||||
attributes = {}
|
||||
|
||||
for attr_name in self.message_attributes:
|
||||
if item.get(attr_name) is not None:
|
||||
attributes[attr_name] = str(item.get(attr_name))
|
||||
|
||||
return attributes
|
||||
|
||||
def create_publisher(self):
|
||||
batch_settings = pubsub_v1.types.BatchSettings(
|
||||
max_bytes=self.batch_max_bytes,
|
||||
max_latency=self.batch_max_latency,
|
||||
max_messages=self.batch_max_messages,
|
||||
)
|
||||
|
||||
publisher_options = pubsub_v1.types.PublisherOptions(enable_message_ordering=self.enable_message_ordering)
|
||||
return pubsub_v1.PublisherClient(batch_settings=batch_settings, publisher_options=publisher_options)
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
44
blockchainetl/jobs/exporters/in_memory_item_exporter.py
Normal file
44
blockchainetl/jobs/exporters/in_memory_item_exporter.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
class InMemoryItemExporter:
|
||||
def __init__(self, item_types):
|
||||
self.item_types = item_types
|
||||
self.items = {}
|
||||
|
||||
def open(self):
|
||||
for item_type in self.item_types:
|
||||
self.items[item_type] = []
|
||||
|
||||
def export_item(self, item):
|
||||
item_type = item.get('type', None)
|
||||
if item_type is None:
|
||||
raise ValueError('type key is not found in item {}'.format(repr(item)))
|
||||
|
||||
self.items[item_type].append(item)
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def get_items(self, item_type):
|
||||
return self.items[item_type]
|
||||
54
blockchainetl/jobs/exporters/kafka_exporter.py
Normal file
54
blockchainetl/jobs/exporters/kafka_exporter.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import collections
|
||||
import json
|
||||
import logging
|
||||
|
||||
from kafka import KafkaProducer
|
||||
|
||||
from blockchainetl.jobs.exporters.converters.composite_item_converter import CompositeItemConverter
|
||||
|
||||
|
||||
class KafkaItemExporter:
|
||||
|
||||
def __init__(self, output, item_type_to_topic_mapping, converters=()):
|
||||
self.item_type_to_topic_mapping = item_type_to_topic_mapping
|
||||
self.converter = CompositeItemConverter(converters)
|
||||
self.connection_url = self.get_connection_url(output)
|
||||
print(self.connection_url)
|
||||
self.producer = KafkaProducer(bootstrap_servers=self.connection_url)
|
||||
|
||||
def get_connection_url(self, output):
|
||||
try:
|
||||
return output.split('/')[1]
|
||||
except KeyError:
|
||||
raise Exception('Invalid kafka output param, It should be in format of "kafka/127.0.0.1:9092"')
|
||||
|
||||
def open(self):
|
||||
pass
|
||||
|
||||
def export_items(self, items):
|
||||
for item in items:
|
||||
self.export_item(item)
|
||||
|
||||
def export_item(self, item):
|
||||
item_type = item.get('type')
|
||||
if item_type is not None and item_type in self.item_type_to_topic_mapping:
|
||||
data = json.dumps(item).encode('utf-8')
|
||||
logging.debug(data)
|
||||
return self.producer.send(self.item_type_to_topic_mapping[item_type], value=data)
|
||||
else:
|
||||
logging.warning('Topic for item type "{}" is not configured.'.format(item_type))
|
||||
|
||||
def convert_items(self, items):
|
||||
for item in items:
|
||||
yield self.converter.convert_item(item)
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
def group_by_item_type(items):
|
||||
result = collections.defaultdict(list)
|
||||
for item in items:
|
||||
result[item.get('type')].append(item)
|
||||
|
||||
return result
|
||||
82
blockchainetl/jobs/exporters/kinesis_item_exporter.py
Normal file
82
blockchainetl/jobs/exporters/kinesis_item_exporter.py
Normal file
@@ -0,0 +1,82 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2022 CoinStats LLC
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import json
|
||||
import typing as t
|
||||
import uuid
|
||||
from itertools import zip_longest
|
||||
|
||||
import boto3
|
||||
|
||||
_KINESIS_BATCH_LIMIT = 500
|
||||
|
||||
|
||||
def _uuid_partition_key(_: dict) -> str:
|
||||
return uuid.uuid4().hex
|
||||
|
||||
|
||||
class KinesisItemExporter:
|
||||
def __init__(
|
||||
self,
|
||||
stream_name: str,
|
||||
partition_key_callable: t.Callable[[dict], str] = _uuid_partition_key,
|
||||
):
|
||||
import boto3
|
||||
self._stream_name = stream_name
|
||||
self._partition_key_callable = partition_key_callable
|
||||
self._kinesis_client = None # initialized in .open
|
||||
|
||||
def open(self) -> None:
|
||||
self._kinesis_client = boto3.client('kinesis')
|
||||
|
||||
def export_items(self, items: t.Iterable[dict]) -> None:
|
||||
sentinel = object()
|
||||
chunks = zip_longest(
|
||||
*(iter(items),) * _KINESIS_BATCH_LIMIT,
|
||||
fillvalue=sentinel,
|
||||
)
|
||||
for chunk in chunks:
|
||||
self._kinesis_client.put_records(
|
||||
StreamName=self._stream_name,
|
||||
Records=[
|
||||
{
|
||||
'Data': _serialize_item(item),
|
||||
'PartitionKey': self._partition_key_callable(item),
|
||||
}
|
||||
for item in chunk
|
||||
if item is not sentinel
|
||||
],
|
||||
)
|
||||
|
||||
def export_item(self, item: dict) -> None:
|
||||
self._kinesis_client.put_record(
|
||||
StreamName=self._stream_name,
|
||||
Data=_serialize_item(item),
|
||||
PartitionKey=self._partition_key_callable(item),
|
||||
)
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
def _serialize_item(item: dict) -> bytes:
|
||||
return json.dumps(item).encode()
|
||||
42
blockchainetl/jobs/exporters/multi_item_exporter.py
Normal file
42
blockchainetl/jobs/exporters/multi_item_exporter.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
class MultiItemExporter:
|
||||
def __init__(self, item_exporters):
|
||||
self.item_exporters = item_exporters
|
||||
|
||||
def open(self):
|
||||
for exporter in self.item_exporters:
|
||||
exporter.open()
|
||||
|
||||
def export_items(self, items):
|
||||
for exporter in self.item_exporters:
|
||||
exporter.export_items(items)
|
||||
|
||||
def export_item(self, item):
|
||||
for exporter in self.item_exporters:
|
||||
exporter.export_item(item)
|
||||
|
||||
def close(self):
|
||||
for exporter in self.item_exporters:
|
||||
exporter.close()
|
||||
70
blockchainetl/jobs/exporters/postgres_item_exporter.py
Normal file
70
blockchainetl/jobs/exporters/postgres_item_exporter.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2020 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import collections
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
from blockchainetl.jobs.exporters.converters.composite_item_converter import CompositeItemConverter
|
||||
|
||||
|
||||
class PostgresItemExporter:
|
||||
|
||||
def __init__(self, connection_url, item_type_to_insert_stmt_mapping, converters=(), print_sql=True):
|
||||
self.connection_url = connection_url
|
||||
self.item_type_to_insert_stmt_mapping = item_type_to_insert_stmt_mapping
|
||||
self.converter = CompositeItemConverter(converters)
|
||||
self.print_sql = print_sql
|
||||
|
||||
self.engine = self.create_engine()
|
||||
|
||||
def open(self):
|
||||
pass
|
||||
|
||||
def export_items(self, items):
|
||||
items_grouped_by_type = group_by_item_type(items)
|
||||
|
||||
for item_type, insert_stmt in self.item_type_to_insert_stmt_mapping.items():
|
||||
item_group = items_grouped_by_type.get(item_type)
|
||||
if item_group:
|
||||
connection = self.engine.connect()
|
||||
converted_items = list(self.convert_items(item_group))
|
||||
connection.execute(insert_stmt, converted_items)
|
||||
|
||||
def convert_items(self, items):
|
||||
for item in items:
|
||||
yield self.converter.convert_item(item)
|
||||
|
||||
def create_engine(self):
|
||||
engine = create_engine(self.connection_url, echo=self.print_sql, pool_recycle=3600)
|
||||
return engine
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
def group_by_item_type(items):
|
||||
result = collections.defaultdict(list)
|
||||
for item in items:
|
||||
result[item.get('type')].append(item)
|
||||
|
||||
return result
|
||||
11
blockchainetl/logging_utils.py
Normal file
11
blockchainetl/logging_utils.py
Normal file
@@ -0,0 +1,11 @@
|
||||
import logging
|
||||
|
||||
|
||||
def logging_basic_config(filename=None):
|
||||
format = '%(asctime)s - %(name)s [%(levelname)s] - %(message)s'
|
||||
if filename is not None:
|
||||
logging.basicConfig(level=logging.INFO, format=format, filename=filename)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO, format=format)
|
||||
|
||||
logging.getLogger('ethereum_dasm.evmdasm').setLevel(logging.ERROR)
|
||||
23
blockchainetl/streaming/__init__.py
Normal file
23
blockchainetl/streaming/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
16
blockchainetl/streaming/postgres_utils.py
Normal file
16
blockchainetl/streaming/postgres_utils.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
|
||||
|
||||
def create_insert_statement_for_table(table):
|
||||
insert_stmt = insert(table)
|
||||
|
||||
primary_key_fields = [column.name for column in table.columns if column.primary_key]
|
||||
if primary_key_fields:
|
||||
insert_stmt = insert_stmt.on_conflict_do_update(
|
||||
index_elements=primary_key_fields,
|
||||
set_={
|
||||
column.name: insert_stmt.excluded[column.name] for column in table.columns if not column.primary_key
|
||||
}
|
||||
)
|
||||
|
||||
return insert_stmt
|
||||
139
blockchainetl/streaming/streamer.py
Normal file
139
blockchainetl/streaming/streamer.py
Normal file
@@ -0,0 +1,139 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
from blockchainetl.streaming.streamer_adapter_stub import StreamerAdapterStub
|
||||
from blockchainetl.file_utils import smart_open
|
||||
|
||||
|
||||
class Streamer:
|
||||
def __init__(
|
||||
self,
|
||||
blockchain_streamer_adapter=StreamerAdapterStub(),
|
||||
last_synced_block_file='last_synced_block.txt',
|
||||
lag=0,
|
||||
start_block=None,
|
||||
end_block=None,
|
||||
period_seconds=10,
|
||||
block_batch_size=10,
|
||||
retry_errors=True,
|
||||
pid_file=None):
|
||||
self.blockchain_streamer_adapter = blockchain_streamer_adapter
|
||||
self.last_synced_block_file = last_synced_block_file
|
||||
self.lag = lag
|
||||
self.start_block = start_block
|
||||
self.end_block = end_block
|
||||
self.period_seconds = period_seconds
|
||||
self.block_batch_size = block_batch_size
|
||||
self.retry_errors = retry_errors
|
||||
self.pid_file = pid_file
|
||||
|
||||
if self.start_block is not None or not os.path.isfile(self.last_synced_block_file):
|
||||
init_last_synced_block_file((self.start_block or 0) - 1, self.last_synced_block_file)
|
||||
|
||||
self.last_synced_block = read_last_synced_block(self.last_synced_block_file)
|
||||
|
||||
def stream(self):
|
||||
try:
|
||||
if self.pid_file is not None:
|
||||
logging.info('Creating pid file {}'.format(self.pid_file))
|
||||
write_to_file(self.pid_file, str(os.getpid()))
|
||||
self.blockchain_streamer_adapter.open()
|
||||
self._do_stream()
|
||||
finally:
|
||||
self.blockchain_streamer_adapter.close()
|
||||
if self.pid_file is not None:
|
||||
logging.info('Deleting pid file {}'.format(self.pid_file))
|
||||
delete_file(self.pid_file)
|
||||
|
||||
def _do_stream(self):
|
||||
while True and (self.end_block is None or self.last_synced_block < self.end_block):
|
||||
synced_blocks = 0
|
||||
|
||||
try:
|
||||
synced_blocks = self._sync_cycle()
|
||||
except Exception as e:
|
||||
# https://stackoverflow.com/a/4992124/1580227
|
||||
logging.exception('An exception occurred while syncing block data.')
|
||||
if not self.retry_errors:
|
||||
raise e
|
||||
|
||||
if synced_blocks <= 0:
|
||||
logging.info('Nothing to sync. Sleeping for {} seconds...'.format(self.period_seconds))
|
||||
time.sleep(self.period_seconds)
|
||||
|
||||
def _sync_cycle(self):
|
||||
current_block = self.blockchain_streamer_adapter.get_current_block_number()
|
||||
|
||||
target_block = self._calculate_target_block(current_block, self.last_synced_block)
|
||||
blocks_to_sync = max(target_block - self.last_synced_block, 0)
|
||||
|
||||
logging.info('Current block {}, target block {}, last synced block {}, blocks to sync {}'.format(
|
||||
current_block, target_block, self.last_synced_block, blocks_to_sync))
|
||||
|
||||
if blocks_to_sync != 0:
|
||||
self.blockchain_streamer_adapter.export_all(self.last_synced_block + 1, target_block)
|
||||
logging.info('Writing last synced block {}'.format(target_block))
|
||||
write_last_synced_block(self.last_synced_block_file, target_block)
|
||||
self.last_synced_block = target_block
|
||||
|
||||
return blocks_to_sync
|
||||
|
||||
def _calculate_target_block(self, current_block, last_synced_block):
|
||||
target_block = current_block - self.lag
|
||||
target_block = min(target_block, last_synced_block + self.block_batch_size)
|
||||
target_block = min(target_block, self.end_block) if self.end_block is not None else target_block
|
||||
return target_block
|
||||
|
||||
|
||||
def delete_file(file):
|
||||
try:
|
||||
os.remove(file)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def write_last_synced_block(file, last_synced_block):
|
||||
write_to_file(file, str(last_synced_block) + '\n')
|
||||
|
||||
|
||||
def init_last_synced_block_file(start_block, last_synced_block_file):
|
||||
if os.path.isfile(last_synced_block_file):
|
||||
raise ValueError(
|
||||
'{} should not exist if --start-block option is specified. '
|
||||
'Either remove the {} file or the --start-block option.'
|
||||
.format(last_synced_block_file, last_synced_block_file))
|
||||
write_last_synced_block(last_synced_block_file, start_block)
|
||||
|
||||
|
||||
def read_last_synced_block(file):
|
||||
with smart_open(file, 'r') as last_synced_block_file:
|
||||
return int(last_synced_block_file.read())
|
||||
|
||||
|
||||
def write_to_file(file, content):
|
||||
with smart_open(file, 'w') as file_handle:
|
||||
file_handle.write(content)
|
||||
13
blockchainetl/streaming/streamer_adapter_stub.py
Normal file
13
blockchainetl/streaming/streamer_adapter_stub.py
Normal file
@@ -0,0 +1,13 @@
|
||||
class StreamerAdapterStub:
|
||||
|
||||
def open(self):
|
||||
pass
|
||||
|
||||
def get_current_block_number(self):
|
||||
return 0
|
||||
|
||||
def export_all(self, start_block, end_block):
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
19
blockchainetl/streaming/streaming_utils.py
Normal file
19
blockchainetl/streaming/streaming_utils.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
|
||||
|
||||
def configure_signals():
|
||||
def sigterm_handler(_signo, _stack_frame):
|
||||
# Raises SystemExit(0):
|
||||
sys.exit(0)
|
||||
|
||||
signal.signal(signal.SIGTERM, sigterm_handler)
|
||||
|
||||
|
||||
def configure_logging(filename):
|
||||
for handler in logging.root.handlers[:]:
|
||||
logging.root.removeHandler(handler)
|
||||
logging_basic_config(filename=filename)
|
||||
42
docs/amazon-athena.md
Normal file
42
docs/amazon-athena.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Amazon Athena
|
||||
|
||||
## Querying in Amazon Athena
|
||||
|
||||
- Upload the files to S3:
|
||||
|
||||
```bash
|
||||
> cd output
|
||||
> aws s3 sync . s3://<your_bucket>/ethereumetl/export --region ap-southeast-1
|
||||
```
|
||||
|
||||
- Sign in to Athena https://console.aws.amazon.com/athena/home
|
||||
|
||||
- Create a database:
|
||||
|
||||
```sql
|
||||
CREATE DATABASE ethereumetl;
|
||||
```
|
||||
|
||||
- Create the tables:
|
||||
- blocks: [schemas/aws/blocks.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/blocks.sql)
|
||||
- transactions: [schemas/aws/transactions.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/transactions.sql)
|
||||
- token_transfers: [schemas/aws/token_transfers.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/token_transfers.sql)
|
||||
- contracts: [schemas/aws/contracts.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/contracts.sql)
|
||||
- receipts: [schemas/aws/receipts.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/receipts.sql)
|
||||
- logs: [schemas/aws/logs.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/logs.sql)
|
||||
- tokens: [schemas/aws/tokens.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/tokens.sql)
|
||||
|
||||
## Airflow DAGs
|
||||
|
||||
Refer to https://github.com/medvedev1088/ethereum-etl-airflow for the instructions.
|
||||
|
||||
## Tables for Parquet Files
|
||||
|
||||
Read [this article](https://medium.com/@medvedev1088/converting-ethereum-etl-files-to-parquet-399e048ddd30) on how to convert CSVs to Parquet.
|
||||
|
||||
- Create the tables:
|
||||
- parquet_blocks: [schemas/aws/parquet/parquet_blocks.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/parquet/parquet_blocks.sql)
|
||||
- parquet_transactions: [schemas/aws/parquet/parquet_transactions.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/parquet/parquet_transactions.sql)
|
||||
- parquet_token_transfers: [schemas/aws/parquet/parquet_token_transfers.sql](https://github.com/blockchain-etl/ethereum-etl/blob/master/schemas/aws/parquet/parquet_token_transfers.sql)
|
||||
|
||||
Note that [DECIMAL type is limited to 38 digits in Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-decimal) so values greater than 38 decimals will be null.
|
||||
10
docs/citing.md
Normal file
10
docs/citing.md
Normal file
@@ -0,0 +1,10 @@
|
||||
## How to Cite
|
||||
|
||||
```
|
||||
@misc{ethereumetl,
|
||||
author = {Evgeny Medvedev and the D5 team},
|
||||
title = {Ethereum ETL},
|
||||
year = {2018},
|
||||
url = {https://github.com/blockchain-etl/ethereum-etl}
|
||||
}
|
||||
```
|
||||
246
docs/commands.md
Normal file
246
docs/commands.md
Normal file
@@ -0,0 +1,246 @@
|
||||
# Commands
|
||||
|
||||
All the commands accept `-h` parameter for help, e.g.:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_blocks_and_transactions -h
|
||||
|
||||
Usage: ethereumetl export_blocks_and_transactions [OPTIONS]
|
||||
|
||||
Export blocks and transactions.
|
||||
|
||||
Options:
|
||||
-s, --start-block INTEGER Start block
|
||||
-e, --end-block INTEGER End block [required]
|
||||
-b, --batch-size INTEGER The number of blocks to export at a time.
|
||||
-p, --provider-uri TEXT The URI of the web3 provider e.g.
|
||||
file://$HOME/Library/Ethereum/geth.ipc or
|
||||
https://mainnet.infura.io
|
||||
-w, --max-workers INTEGER The maximum number of workers.
|
||||
--blocks-output TEXT The output file for blocks. If not provided
|
||||
blocks will not be exported. Use "-" for stdout
|
||||
--transactions-output TEXT The output file for transactions. If not
|
||||
provided transactions will not be exported. Use
|
||||
"-" for stdout
|
||||
-h, --help Show this message and exit.
|
||||
```
|
||||
|
||||
For the `--output` parameters the supported types are csv and json. The format type is inferred from the output file name.
|
||||
|
||||
#### export_blocks_and_transactions
|
||||
|
||||
```bash
|
||||
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc \
|
||||
--blocks-output blocks.csv --transactions-output transactions.csv
|
||||
```
|
||||
|
||||
Omit `--blocks-output` or `--transactions-output` options if you want to export only transactions/blocks.
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
[Blocks and transactions schema](schema.md#blockscsv).
|
||||
|
||||
#### export_token_transfers
|
||||
|
||||
The API used in this command is not supported by Infura, so you will need a local node.
|
||||
If you want to use Infura for exporting ERC20 transfers refer to [extract_token_transfers](#extract_token_transfers)
|
||||
|
||||
```bash
|
||||
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --batch-size 100 --output token_transfers.csv
|
||||
```
|
||||
|
||||
Include `--tokens <token1> --tokens <token2>` to filter only certain tokens, e.g.
|
||||
|
||||
```bash
|
||||
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output token_transfers.csv \
|
||||
--tokens 0x1F573D6Fb3F13d689FF844B4cE37794d79a7FF1C --tokens 0x80fB784B7eD66730e8b1DBd9820aFD29931aab03
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
[Token transfers schema](schema.md#token_transferscsv).
|
||||
|
||||
#### export_receipts_and_logs
|
||||
|
||||
First extract transaction hashes from `transactions.csv`
|
||||
(Exported with [export_blocks_and_transactions](#export_blocks_and_transactions)):
|
||||
|
||||
```bash
|
||||
> ethereumetl extract_csv_column --input transactions.csv --column hash --output transaction_hashes.txt
|
||||
```
|
||||
|
||||
Then export receipts and logs:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_receipts_and_logs --transaction-hashes transaction_hashes.txt \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --receipts-output receipts.csv --logs-output logs.csv
|
||||
```
|
||||
|
||||
Omit `--receipts-output` or `--logs-output` options if you want to export only logs/receipts.
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
Upvote this feature request https://github.com/paritytech/parity/issues/9075,
|
||||
it will make receipts and logs export much faster.
|
||||
|
||||
[Receipts and logs schema](schema.md#receiptscsv).
|
||||
|
||||
#### extract_token_transfers
|
||||
|
||||
First export receipt logs with [export_receipts_and_logs](#export_receipts_and_logs).
|
||||
|
||||
Then extract transfers from the logs.csv file:
|
||||
|
||||
```bash
|
||||
> ethereumetl extract_token_transfers --logs logs.csv --output token_transfers.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
[Token transfers schema](schema.md#token_transferscsv).
|
||||
|
||||
#### export_contracts
|
||||
|
||||
First extract contract addresses from `receipts.csv`
|
||||
(Exported with [export_receipts_and_logs](#export_receipts_and_logs)):
|
||||
|
||||
```bash
|
||||
> ethereumetl extract_csv_column --input receipts.csv --column contract_address --output contract_addresses.txt
|
||||
```
|
||||
|
||||
Then export contracts:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_contracts --contract-addresses contract_addresses.txt \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output contracts.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
[Contracts schema](schema.md#contractscsv).
|
||||
|
||||
#### export_tokens
|
||||
|
||||
First extract token addresses from `contracts.json`
|
||||
(Exported with [export_contracts](#export_contracts)):
|
||||
|
||||
```bash
|
||||
> ethereumetl filter_items -i contracts.json -p "item['is_erc20'] or item['is_erc721']" | \
|
||||
ethereumetl extract_field -f address -o token_addresses.txt
|
||||
```
|
||||
|
||||
Then export ERC20 / ERC721 tokens:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_tokens --token-addresses token_addresses.txt \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output tokens.csv
|
||||
```
|
||||
|
||||
You can tune `--max-workers` for performance.
|
||||
|
||||
[Tokens schema](schema.md#tokenscsv).
|
||||
|
||||
#### export_traces
|
||||
|
||||
Also called internal transactions.
|
||||
The API used in this command is not supported by Infura,
|
||||
so you will need a local Parity archive node (`parity --tracing on`).
|
||||
Make sure your node has at least 8GB of memory, or else you will face timeout errors.
|
||||
See [this issue](https://github.com/blockchain-etl/ethereum-etl/issues/137)
|
||||
|
||||
```bash
|
||||
> ethereumetl export_traces --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/parity.ipc --batch-size 100 --output traces.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
[Traces schema](schema.md#tracescsv).
|
||||
|
||||
#### export_geth_traces
|
||||
|
||||
Read [Differences between geth and parity traces.csv](schema.md#differences-between-geth-and-parity-tracescsv)
|
||||
|
||||
The API used in this command is not supported by Infura,
|
||||
so you will need a local Geth archive node (`geth --gcmode archive --syncmode full --txlookuplimit 0`).
|
||||
When using rpc, add `--rpc --rpcapi debug` options.
|
||||
|
||||
```bash
|
||||
> ethereumetl export_geth_traces --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --batch-size 100 --output geth_traces.json
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
#### extract_geth_traces
|
||||
|
||||
```bash
|
||||
> ethereumetl extract_geth_traces --input geth_traces.json --output traces.csv
|
||||
```
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
#### get_block_range_for_date
|
||||
|
||||
```bash
|
||||
> ethereumetl get_block_range_for_date --provider-uri=https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c --date 2018-01-01
|
||||
4832686,4838611
|
||||
```
|
||||
|
||||
#### get_keccak_hash
|
||||
|
||||
```bash
|
||||
> ethereumetl get_keccak_hash -i "transfer(address,uint256)"
|
||||
0xa9059cbb2ab09eb219583f4a59a5d0623ade346d962bcd4e46b11da047c9049b
|
||||
```
|
||||
|
||||
#### stream
|
||||
|
||||
```bash
|
||||
> pip3 install ethereum-etl[streaming]
|
||||
> ethereumetl stream --provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c --start-block 500000
|
||||
```
|
||||
|
||||
- This command outputs blocks, transactions, logs, token_transfers to the console by default.
|
||||
- Entity types can be specified with the `-e` option,
|
||||
e.g. `-e block,transaction,log,token_transfer,trace,contract,token`.
|
||||
- Use `--output` option to specify the Google Pub/Sub topic, Postgres database or GCS bucket where to publish blockchain data,
|
||||
- For Google PubSub: `--output=projects/<your-project>/topics/crypto_ethereum`.
|
||||
Data will be pushed to `projects/<your-project>/topics/crypto_ethereum.blocks`, `projects/<your-project>/topics/crypto_ethereum.transactions` etc. topics.
|
||||
- For Postgres: `--output=postgresql+pg8000://<user>:<password>@<host>:<port>/<database_name>`,
|
||||
e.g. `--output=postgresql+pg8000://postgres:admin@127.0.0.1:5432/ethereum`.
|
||||
- For GCS: `--output=gs://<bucket_name>`. Make sure to install and initialize `gcloud` cli.
|
||||
- For Kafka: `--output=kafka/<host>:<port>`, e.g. `--output=kafka/127.0.0.1:9092`
|
||||
- Those output types can be combined with a comma e.g. `--output=gs://<bucket_name>,projects/<your-project>/topics/crypto_ethereum`
|
||||
|
||||
The [schema](https://github.com/blockchain-etl/ethereum-etl-postgres/tree/master/schema)
|
||||
and [indexes](https://github.com/blockchain-etl/ethereum-etl-postgres/tree/master/indexes) can be found in this
|
||||
repo [ethereum-etl-postgres](https://github.com/blockchain-etl/ethereum-etl-postgres).
|
||||
- The command saves its state to `last_synced_block.txt` file where the last synced block number is saved periodically.
|
||||
- Specify either `--start-block` or `--last-synced-block-file` option. `--last-synced-block-file` should point to the
|
||||
file where the block number, from which to start streaming the blockchain data, is saved.
|
||||
- Use the `--lag` option to specify how many blocks to lag behind the head of the blockchain. It's the simplest way to
|
||||
handle chain reorganizations - they are less likely the further a block from the head.
|
||||
- You can tune `--period-seconds`, `--batch-size`, `--block-batch-size`, `--max-workers` for performance.
|
||||
- Refer to [blockchain-etl-streaming](https://github.com/blockchain-etl/blockchain-etl-streaming) for
|
||||
instructions on deploying it to Kubernetes.
|
||||
|
||||
Stream blockchain data continually to Google Pub/Sub:
|
||||
|
||||
```bash
|
||||
> export GOOGLE_APPLICATION_CREDENTIALS=/path_to_credentials_file.json
|
||||
> ethereumetl stream --start-block 500000 --output projects/<your-project>/topics/crypto_ethereum
|
||||
```
|
||||
|
||||
Stream blockchain data to a Postgres database:
|
||||
|
||||
```bash
|
||||
ethereumetl stream --start-block 500000 --output postgresql+pg8000://<user>:<password>@<host>:5432/<database>
|
||||
```
|
||||
|
||||
The [schema](https://github.com/blockchain-etl/ethereum-etl-postgres/tree/master/schema)
|
||||
and [indexes](https://github.com/blockchain-etl/ethereum-etl-postgres/tree/master/indexes) can be found in this
|
||||
repo [ethereum-etl-postgres](https://github.com/blockchain-etl/ethereum-etl-postgres).
|
||||
3
docs/contact.md
Normal file
3
docs/contact.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Contact
|
||||
|
||||
- [Telegram Group](https://t.me/joinchat/GsMpbA3mv1OJ6YMp3T5ORQ)
|
||||
11
docs/dockerhub.md
Normal file
11
docs/dockerhub.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# Uploading to Docker Hub
|
||||
|
||||
```bash
|
||||
ETHEREUMETL_VERSION=1.11.0
|
||||
docker build -t ethereum-etl:${ETHEREUMETL_VERSION} -f Dockerfile .
|
||||
docker tag ethereum-etl:${ETHEREUMETL_VERSION} blockchainetl/ethereum-etl:${ETHEREUMETL_VERSION}
|
||||
docker push blockchainetl/ethereum-etl:${ETHEREUMETL_VERSION}
|
||||
|
||||
docker tag ethereum-etl:${ETHEREUMETL_VERSION} blockchainetl/ethereum-etl:latest
|
||||
docker push blockchainetl/ethereum-etl:latest
|
||||
```
|
||||
4
docs/ethereum-classic.md
Normal file
4
docs/ethereum-classic.md
Normal file
@@ -0,0 +1,4 @@
|
||||
# Ethereum Classic
|
||||
|
||||
For getting ETC csv files, make sure you pass in the `--chain classic` param where it's required for the scripts you want to export.
|
||||
ETC won't run if your `--provider-uri` is Infura. It will provide a warning and change the provider-uri to `https://ethereumclassic.network` instead. For faster performance, run a client instead locally for classic such as `parity chain=classic` and Geth-classic.
|
||||
51
docs/exporting-the-blockchain.md
Normal file
51
docs/exporting-the-blockchain.md
Normal file
@@ -0,0 +1,51 @@
|
||||
## Exporting the Blockchain
|
||||
|
||||
1. Install python 3.5.3+: [https://www.python.org/downloads/](https://www.python.org/downloads/)
|
||||
|
||||
1. You can use Infura if you don't need ERC20 transfers (Infura doesn't support eth_getFilterLogs JSON RPC method).
|
||||
For that use `-p https://mainnet.infura.io` option for the commands below. If you need ERC20 transfers or want to
|
||||
export the data ~40 times faster, you will need to set up a local Ethereum node:
|
||||
|
||||
1. Install geth: [https://github.com/ethereum/go-ethereum/wiki/Installing-Geth](https://github.com/ethereum/go-ethereum/wiki/Installing-Geth)
|
||||
|
||||
1. Start geth.
|
||||
Make sure it downloaded the blocks that you need by executing `eth.syncing` in the JS console.
|
||||
You can export blocks below `currentBlock`,
|
||||
there is no need to wait until the full sync as the state is not needed (unless you also need contracts bytecode
|
||||
and token details; for those you need to wait until the full sync). Note that you may need to wait for another day or
|
||||
two for the node to download the states. See this issue [https://github.com/blockchain-etl/ethereum-etl/issues/265#issuecomment-970451522](https://github.com/blockchain-etl/ethereum-etl/issues/265#issuecomment-970451522).
|
||||
Make sure to set `--txlookuplimit 0` if you use geth.
|
||||
|
||||
1. Install Ethereum ETL: `> pip3 install ethereum-etl`
|
||||
|
||||
1. Export all:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_all --help
|
||||
> ethereumetl export_all -s 0 -e 5999999 -b 100000 -p file://$HOME/Library/Ethereum/geth.ipc -o output
|
||||
```
|
||||
|
||||
In case `ethereumetl` command is not available in PATH, use `python3 -m ethereumetl` instead.
|
||||
|
||||
The result will be in the `output` subdirectory, partitioned in Hive style:
|
||||
```bash
|
||||
output/blocks/start_block=00000000/end_block=00099999/blocks_00000000_00099999.csv
|
||||
output/blocks/start_block=00100000/end_block=00199999/blocks_00100000_00199999.csv
|
||||
...
|
||||
output/transactions/start_block=00000000/end_block=00099999/transactions_00000000_00099999.csv
|
||||
...
|
||||
output/token_transfers/start_block=00000000/end_block=00099999/token_transfers_00000000_00099999.csv
|
||||
...
|
||||
```
|
||||
|
||||
Should work with geth and parity, on Linux, Mac, Windows.
|
||||
If you use Parity you should disable warp mode with `--no-warp` option because warp mode
|
||||
does not place all of the block or receipt data into the database [https://wiki.parity.io/Getting-Synced](https://wiki.parity.io/Getting-Synced)
|
||||
|
||||
If you see weird behavior, e.g. wrong number of rows in the CSV files or corrupted files,
|
||||
check out this issue: https://github.com/medvedev1088/ethereum-etl/issues/28
|
||||
|
||||
### Export in 2 Hours
|
||||
|
||||
You can use AWS Auto Scaling and Data Pipeline to reduce the exporting time to a few hours.
|
||||
Read [this article](https://medium.com/@medvedev1088/how-to-export-the-entire-ethereum-blockchain-to-csv-in-2-hours-for-10-69fef511e9a2) for details.
|
||||
19
docs/google-bigquery.md
Normal file
19
docs/google-bigquery.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Google BigQuery
|
||||
|
||||
## Querying in BigQuery
|
||||
|
||||
If you'd rather not export the blockchain data yourself, we publish all tables as a public dataset in [BigQuery](https://medium.com/@medvedev1088/ethereum-blockchain-on-google-bigquery-283fb300f579).
|
||||
|
||||
Data is updated near real-time (~4-minute delay to account for block finality).
|
||||
|
||||
### How to Query Balances for all Ethereum Addresses
|
||||
|
||||
Read [this article](https://medium.com/google-cloud/how-to-query-balances-for-all-ethereum-addresses-in-bigquery-fb594e4034a7).
|
||||
|
||||
### Building Token Recommender in Google Cloud Platform
|
||||
|
||||
Read [this article](https://medium.com/google-cloud/building-token-recommender-in-google-cloud-platform-1be5a54698eb).
|
||||
|
||||
### Awesome BigQuery Views
|
||||
|
||||
[https://github.com/blockchain-etl/awesome-bigquery-views](https://github.com/blockchain-etl/awesome-bigquery-views)
|
||||
47
docs/index.md
Normal file
47
docs/index.md
Normal file
@@ -0,0 +1,47 @@
|
||||
# Overview
|
||||
|
||||
Ethereum ETL lets you convert blockchain data into convenient formats like CSVs and relational databases.
|
||||
|
||||
With 1,700+ likes on GitHub, Ethereum ETL is the most popular open-source project for Ethereum data.
|
||||
|
||||
Data is available for you to query right away in [Google BigQuery](https://goo.gl/oY5BCQ).
|
||||
|
||||
## Features
|
||||
|
||||
Easily export:
|
||||
|
||||
* Blocks
|
||||
* Transactions
|
||||
* ERC20 / ERC721 tokens
|
||||
* Token transfers
|
||||
* Receipts
|
||||
* Logs
|
||||
* Contracts
|
||||
* Internal transactions (traces)
|
||||
|
||||
## Advanced Features
|
||||
|
||||
* Stream blockchain data to Pub/Sub, Postgres, or other destinations in real-time
|
||||
* Filter and transform data using flexible command-line options
|
||||
* Support for multiple Ethereum node providers (Geth, Parity, Infura, etc.)
|
||||
* Handles chain reorganizations through configurable lag
|
||||
* Export data by block range or by date
|
||||
* Scalable architecture with configurable batch sizes and worker counts
|
||||
|
||||
## Use Cases
|
||||
|
||||
* Data analysis and visualization
|
||||
* Machine learning on blockchain data
|
||||
* Building analytics dashboards
|
||||
* Market research and token analysis
|
||||
* Compliance and audit reporting
|
||||
* Academic research on blockchain economics
|
||||
|
||||
## Projects using Ethereum ETL
|
||||
* [Google](https://goo.gl/oY5BCQ) - Public BigQuery Ethereum datasets
|
||||
* [Nansen](https://nansen.ai/query?ref=ethereumetl) - Analytics platform for Ethereum
|
||||
* [Ethereum Blockchain ETL on GCP](https://cloud.google.com/blog/products/data-analytics/ethereum-bigquery-public-dataset-smart-contract-analytics) - Official Google Cloud reference architecture
|
||||
|
||||
## Getting Started
|
||||
|
||||
Check the [Quickstart](quickstart.md) guide to begin using Ethereum ETL or explore the [Commands](commands.md) page for detailed usage instructions.
|
||||
15
docs/limitations.md
Normal file
15
docs/limitations.md
Normal file
@@ -0,0 +1,15 @@
|
||||
# Limitation
|
||||
|
||||
- In case the contract is a proxy, which forwards all calls to a delegate, interface detection doesn’t work,
|
||||
which means `is_erc20` and `is_erc721` will always be false for proxy contracts and they will be missing in the `tokens`
|
||||
table.
|
||||
- The metadata methods (`symbol`, `name`, `decimals`, `total_supply`) for ERC20 are optional, so around 10% of the
|
||||
contracts are missing this data. Also some contracts (EOS) implement these methods but with the wrong return type,
|
||||
so the metadata columns are missing in this case as well.
|
||||
- `token_transfers.value`, `tokens.decimals` and `tokens.total_supply` have type `STRING` in BigQuery tables,
|
||||
because numeric types there can't handle 32-byte integers. You should use
|
||||
`cast(value as FLOAT64)` (possible loss of precision) or
|
||||
`safe_cast(value as NUMERIC)` (possible overflow) to convert to numbers.
|
||||
- The contracts that don't implement `decimals()` function but have the
|
||||
[fallback function](https://solidity.readthedocs.io/en/v0.4.21/contracts.html#fallback-function) that returns a `boolean`
|
||||
will have `0` or `1` in the `decimals` column in the CSVs.
|
||||
10
docs/media.md
Normal file
10
docs/media.md
Normal file
@@ -0,0 +1,10 @@
|
||||
## Ethereum ETL in the Media
|
||||
|
||||
- [A Technical Breakdown Of Google's New Blockchain Search Tools](https://www.forbes.com/sites/michaeldelcastillo/2019/02/05/google-launches-search-for-bitcoin-ethereum-bitcoin-cash-dash-dogecoin-ethereum-classic-litecoin-and-zcash/#394fc868c789)
|
||||
- [Navigating Bitcoin, Ethereum, XRP: How Google Is Quietly Making Blockchains Searchable](https://www.forbes.com/sites/michaeldelcastillo/2019/02/04/navigating-bitcoin-ethereum-xrp-how-google-is-quietly-making-blockchains-searchable/?ss=crypto-blockchain#49e111da4248)
|
||||
- [Ethereum in BigQuery: a Public Dataset for smart contract analytics](https://cloud.google.com/blog/products/data-analytics/ethereum-bigquery-public-dataset-smart-contract-analytics)
|
||||
- [Ethereum in BigQuery: how we built this dataset](https://cloud.google.com/blog/products/data-analytics/ethereum-bigquery-how-we-built-dataset)
|
||||
- [Introducing six new cryptocurrencies in BigQuery Public Datasets—and how to analyze them](https://cloud.google.com/blog/products/data-analytics/introducing-six-new-cryptocurrencies-in-bigquery-public-datasets-and-how-to-analyze-them)
|
||||
- [Querying the Ethereum Blockchain in Snowflake](https://community.snowflake.com/s/article/Querying-the-Ethereum-Blockchain-in-Snowflake)
|
||||
- [ConsenSys Grants funds third cohort of projects to benefit the Ethereum ecosystem](https://www.cryptoninjas.net/2020/02/17/consensys-grants-funds-third-cohort-of-projects-to-benefit-the-ethereum-ecosystem/)
|
||||
- [Unlocking the Power of Google BigQuery (Cloud Next '19)](https://youtu.be/KL_i5XZIaJg?t=131)
|
||||
29
docs/pypi.md
Normal file
29
docs/pypi.md
Normal file
@@ -0,0 +1,29 @@
|
||||
# Uploading to PYPI
|
||||
|
||||
Create `$HOME/.pypirc` with the following content:
|
||||
|
||||
```
|
||||
[distutils]
|
||||
index-servers=
|
||||
testpypi
|
||||
pypi
|
||||
|
||||
[testpypi]
|
||||
repository = https://test.pypi.org/legacy/
|
||||
username = <username>
|
||||
password = <password>
|
||||
|
||||
[pypi]
|
||||
repository = https://upload.pypi.org/legacy/
|
||||
username = <username>
|
||||
password = <password>
|
||||
```
|
||||
|
||||
Then run:
|
||||
|
||||
```bash
|
||||
> python setup.py sdist
|
||||
> twine upload dist/* -r testpypi
|
||||
> pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple ethereum-etl
|
||||
```
|
||||
|
||||
45
docs/quickstart.md
Normal file
45
docs/quickstart.md
Normal file
@@ -0,0 +1,45 @@
|
||||
# Quickstart
|
||||
|
||||
Install Ethereum ETL:
|
||||
|
||||
```bash
|
||||
pip3 install ethereum-etl
|
||||
```
|
||||
|
||||
Export blocks and transactions:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_blocks_and_transactions --start-block 0 --end-block 500000 \
|
||||
--provider-uri https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c --blocks-output blocks.csv --transactions-output transactions.csv
|
||||
```
|
||||
|
||||
Export ERC20 and ERC721 transfers:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_token_transfers --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output token_transfers.csv
|
||||
```
|
||||
|
||||
Export traces:
|
||||
|
||||
```bash
|
||||
> ethereumetl export_traces --start-block 0 --end-block 500000 \
|
||||
--provider-uri file://$HOME/Library/Ethereum/parity.ipc --output traces.csv
|
||||
```
|
||||
|
||||
Stream blocks, transactions, logs, token_transfers continually to console:
|
||||
|
||||
```bash
|
||||
> pip3 install ethereum-etl[streaming]
|
||||
> ethereumetl stream --start-block 500000 -e block,transaction,log,token_transfer --log-file log.txt
|
||||
```
|
||||
|
||||
Find all commands [here](commands.md).
|
||||
|
||||
---
|
||||
|
||||
To run the latest version of Ethereum ETL, check out the repo and call
|
||||
```bash
|
||||
> pip3 install -e .
|
||||
> python3 ethereumetl.py
|
||||
```
|
||||
167
docs/schema.md
Normal file
167
docs/schema.md
Normal file
@@ -0,0 +1,167 @@
|
||||
# Schema
|
||||
|
||||
## blocks.csv
|
||||
|
||||
Column | Type |
|
||||
------------------|--------------------|
|
||||
number | bigint |
|
||||
hash | hex_string |
|
||||
parent_hash | hex_string |
|
||||
nonce | hex_string |
|
||||
sha3_uncles | hex_string |
|
||||
logs_bloom | hex_string |
|
||||
transactions_root | hex_string |
|
||||
state_root | hex_string |
|
||||
receipts_root | hex_string |
|
||||
miner | address |
|
||||
difficulty | numeric |
|
||||
total_difficulty | numeric |
|
||||
size | bigint |
|
||||
extra_data | hex_string |
|
||||
gas_limit | bigint |
|
||||
gas_used | bigint |
|
||||
timestamp | bigint |
|
||||
transaction_count | bigint |
|
||||
base_fee_per_gas | bigint |
|
||||
withdrawals_root | string |
|
||||
withdrawals | string |
|
||||
blob_gas_used | bigint |
|
||||
excess_blob_gas | bigint |
|
||||
|
||||
---
|
||||
|
||||
## transactions.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------|-------------|
|
||||
hash | hex_string |
|
||||
nonce | bigint |
|
||||
block_hash | hex_string |
|
||||
block_number | bigint |
|
||||
transaction_index| bigint |
|
||||
from_address | address |
|
||||
to_address | address |
|
||||
value | numeric |
|
||||
gas | bigint |
|
||||
gas_price | bigint |
|
||||
input | hex_string |
|
||||
block_timestamp | bigint |
|
||||
max_fee_per_gas | bigint |
|
||||
max_priority_fee_per_gas | bigint |
|
||||
transaction_type | bigint |
|
||||
max_fee_per_blob_gas | bigint |
|
||||
blob_versioned_hashes | string |
|
||||
|
||||
---
|
||||
|
||||
## token_transfers.csv
|
||||
|
||||
Column | Type |
|
||||
--------------------|-------------|
|
||||
token_address | address |
|
||||
from_address | address |
|
||||
to_address | address |
|
||||
value | numeric |
|
||||
transaction_hash | hex_string |
|
||||
log_index | bigint |
|
||||
block_number | bigint |
|
||||
|
||||
---
|
||||
|
||||
## receipts.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
transaction_hash | hex_string |
|
||||
transaction_index | bigint |
|
||||
block_hash | hex_string |
|
||||
block_number | bigint |
|
||||
cumulative_gas_used | bigint |
|
||||
gas_used | bigint |
|
||||
contract_address | address |
|
||||
root | hex_string |
|
||||
status | bigint |
|
||||
effective_gas_price | bigint |
|
||||
blob_gas_price | bigint |
|
||||
blob_gas_used | bigint |
|
||||
|
||||
---
|
||||
|
||||
## logs.csv
|
||||
|
||||
Column | Type |
|
||||
-------------------------|-------------|
|
||||
log_index | bigint |
|
||||
transaction_hash | hex_string |
|
||||
transaction_index | bigint |
|
||||
block_hash | hex_string |
|
||||
block_number | bigint |
|
||||
address | address |
|
||||
data | hex_string |
|
||||
topics | string |
|
||||
|
||||
---
|
||||
|
||||
## contracts.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
address | address |
|
||||
bytecode | hex_string |
|
||||
function_sighashes | string |
|
||||
is_erc20 | boolean |
|
||||
is_erc721 | boolean |
|
||||
block_number | bigint |
|
||||
|
||||
---
|
||||
|
||||
## tokens.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
address | address |
|
||||
symbol | string |
|
||||
name | string |
|
||||
decimals | bigint |
|
||||
total_supply | numeric |
|
||||
block_number | bigint |
|
||||
|
||||
---
|
||||
|
||||
## traces.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
block_number | bigint |
|
||||
transaction_hash | hex_string |
|
||||
transaction_index | bigint |
|
||||
from_address | address |
|
||||
to_address | address |
|
||||
value | numeric |
|
||||
input | hex_string |
|
||||
output | hex_string |
|
||||
trace_type | string |
|
||||
call_type | string |
|
||||
reward_type | string |
|
||||
gas | bigint |
|
||||
gas_used | bigint |
|
||||
subtraces | bigint |
|
||||
trace_address | string |
|
||||
error | string |
|
||||
status | bigint |
|
||||
trace_id | string |
|
||||
|
||||
### Differences between geth and parity traces.csv
|
||||
|
||||
- `to_address` field differs for `callcode` trace (geth seems to return correct value, as parity value of `to_address` is the same as `to_address` of parent call);
|
||||
- geth output doesn't have `reward` traces;
|
||||
- geth output doesn't have `to_address`, `from_address`, `value` for `suicide` traces;
|
||||
- `error` field contains human readable error message, which might differ in geth/parity output;
|
||||
- geth output doesn't have `transaction_hash`;
|
||||
- `gas_used` is 0 on traces with error in geth, empty in parity;
|
||||
- zero output of subcalls is `0x000...` in geth, `0x` in parity;
|
||||
|
||||
You can find column descriptions in [https://github.com/medvedev1088/ethereum-etl-airflow](https://github.com/medvedev1088/ethereum-etl-airflow/tree/master/dags/resources/stages/raw/schemas)
|
||||
|
||||
Note: for the `address` type all hex characters are lower-cased.
|
||||
`boolean` type can have 2 values: `True` or `False`.
|
||||
@@ -1 +0,0 @@
|
||||
from . import evmdasm
|
||||
@@ -1,582 +0,0 @@
|
||||
#! /usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Author : <github.com/tintinweb>
|
||||
# from future import print_function
|
||||
"""
|
||||
Verbose EthereumVM Disassembler
|
||||
|
||||
OPCODES taken from:
|
||||
https://github.com/ethereum/go-ethereum/blob/master/core/vm/opcodes.go
|
||||
https://github.com/ethereum/yellowpaper/blob/master/Paper.tex
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
import itertools
|
||||
import time
|
||||
import requests
|
||||
|
||||
try:
|
||||
import ethereum_input_decoder
|
||||
except ImportError:
|
||||
ethereum_input_decoder = None
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def hex_decode(s):
|
||||
try:
|
||||
return bytes.fromhex(s).decode('ascii')
|
||||
except (NameError, AttributeError):
|
||||
return s.decode("hex")
|
||||
except (UnicodeDecodeError):
|
||||
return '' #invalid
|
||||
|
||||
|
||||
def is_ascii_subsequence(s, min_percent=0.51):
|
||||
if len(s) == 0:
|
||||
return False
|
||||
return [128 > ord(c) > 0x20 for c in s].count(True) / float(len(s)) >= min_percent
|
||||
|
||||
|
||||
cache_lookup_function_signature = {} # memcache for lookkup_function_signature
|
||||
|
||||
|
||||
def lookup_function_signature(sighash):
|
||||
if not ethereum_input_decoder:
|
||||
return []
|
||||
cache_hit = cache_lookup_function_signature.get(sighash)
|
||||
if cache_hit:
|
||||
return cache_hit
|
||||
cache_lookup_function_signature[sighash] = list(ethereum_input_decoder.decoder.FourByteDirectory.lookup_signatures(sighash))
|
||||
return cache_lookup_function_signature[sighash]
|
||||
|
||||
|
||||
class EthJsonRpc(object):
|
||||
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
self.id = 1
|
||||
self.session = requests.session()
|
||||
|
||||
def call(self, method, params=None):
|
||||
|
||||
params = params or []
|
||||
data = {
|
||||
'jsonrpc': '2.0',
|
||||
'method': method,
|
||||
'params': params,
|
||||
'id': self.id,
|
||||
}
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
resp = self.session.post(self.url, headers=headers, json=data)
|
||||
|
||||
self.id += 1
|
||||
return resp.json()
|
||||
|
||||
|
||||
class BasicBlock(object):
|
||||
|
||||
def __init__(self, address=None, name=None, instructions=None):
|
||||
self.instructions = instructions or []
|
||||
self.address = address
|
||||
self.name = name
|
||||
|
||||
def __repr__(self):
|
||||
return "<BasicBlock 0x%x instructions:%d>" % (self.address, len(self.instructions))
|
||||
|
||||
|
||||
class Instruction(object):
|
||||
""" Base Instruction class
|
||||
|
||||
doubly linked
|
||||
"""
|
||||
|
||||
def __init__(self, opcode, name, length_of_operand=0, description=None):
|
||||
self.opcode, self.name, self.length_of_operand = opcode, name, length_of_operand
|
||||
self.operand = ''
|
||||
self.description = description
|
||||
self.address = None
|
||||
self.next = None
|
||||
self.previous = None
|
||||
self.xrefs = set([])
|
||||
self.jumpto = None
|
||||
self.basicblock = None
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s name=%s address=%s size=%d>" % (self.__class__.__name__, self.name, hex(self.address), self.size())
|
||||
|
||||
def __str__(self):
|
||||
return "%s %s" % (self.name, "0x%s" % self.operand if self.operand else '')
|
||||
|
||||
def size(self):
|
||||
return 1 + len(self.operand) // 2 # opcode + operand
|
||||
|
||||
def consume(self, bytecode):
|
||||
# clone
|
||||
m = Instruction(opcode=self.opcode,
|
||||
name=self.name,
|
||||
length_of_operand=self.length_of_operand,
|
||||
description=self.description)
|
||||
# consume
|
||||
m.operand = ''.join('%0.2x' % _ for _ in itertools.islice(bytecode, m.length_of_operand))
|
||||
return m
|
||||
|
||||
def serialize(self):
|
||||
return '%0.2x' % self.opcode + self.operand
|
||||
|
||||
def describe_operand(self, resolve_funcsig=False):
|
||||
if not self.operand:
|
||||
str_operand = ''
|
||||
elif resolve_funcsig and len(self.operand) == 8 and self.address < 0x100:
|
||||
# speed improvment: its very unlikely that there will be funcsigs after addr 400
|
||||
# 4bytes, could be a func-sig
|
||||
pot_funcsigs = lookup_function_signature(self.operand)
|
||||
if len(pot_funcsigs) == 0:
|
||||
ascii = ''
|
||||
elif len(pot_funcsigs) == 1:
|
||||
ascii = ' (\'function %s\')' % pot_funcsigs[0]
|
||||
else:
|
||||
ascii = ' (*ambiguous* \'function %s\')' % pot_funcsigs[0]
|
||||
|
||||
str_operand = "0x%s%s" % (self.operand, ascii)
|
||||
elif len(self.operand) > 8:
|
||||
ascii = ' (%r)' % hex_decode(self.operand) \
|
||||
if self.operand and is_ascii_subsequence(hex_decode(self.operand)) else ''
|
||||
str_operand = "0x%s%s" % (self.operand, ascii)
|
||||
else:
|
||||
ascii = ''
|
||||
str_operand = "0x%s%s" % (self.operand, ascii)
|
||||
|
||||
extra = "@%s" % hex(self.jumpto) if self.jumpto else ''
|
||||
return "%s%s" % (str_operand, extra)
|
||||
|
||||
|
||||
OPCODES = [
|
||||
# Stop and Arithmetic Operations
|
||||
Instruction(opcode=0x00, name='STOP', description="Halts execution."),
|
||||
Instruction(opcode=0x01, name='ADD', description="Addition operation."),
|
||||
Instruction(opcode=0x02, name='MUL', description="Multiplication operation."),
|
||||
Instruction(opcode=0x03, name='SUB', description="Subtraction operation."),
|
||||
Instruction(opcode=0x04, name='DIV', description="Integer division operation."),
|
||||
Instruction(opcode=0x05, name='SDIV', description="Signed integer"),
|
||||
Instruction(opcode=0x06, name='MOD', description="Modulo"),
|
||||
Instruction(opcode=0x07, name='SMOD', description="Signed modulo"),
|
||||
Instruction(opcode=0x08, name='ADDMOD', description="Modulo"),
|
||||
Instruction(opcode=0x09, name='MULMOD', description="Modulo"),
|
||||
Instruction(opcode=0x0a, name='EXP', description="Exponential operation."),
|
||||
Instruction(opcode=0x0b, name='SIGNEXTEND', description="Extend length of two’s complement signed integer."),
|
||||
|
||||
# Comparison & Bitwise Logic Operations
|
||||
Instruction(opcode=0x10, name='LT', description="Lesser-than comparison"),
|
||||
Instruction(opcode=0x11, name='GT', description="Greater-than comparison"),
|
||||
Instruction(opcode=0x12, name='SLT', description="Signed less-than comparison"),
|
||||
Instruction(opcode=0x13, name='SGT', description="Signed greater-than comparison"),
|
||||
Instruction(opcode=0x14, name='EQ', description="Equality comparison"),
|
||||
Instruction(opcode=0x15, name='ISZERO', description="Simple not operator"),
|
||||
Instruction(opcode=0x16, name='AND', description="Bitwise AND operation."),
|
||||
Instruction(opcode=0x17, name='OR', description="Bitwise OR operation."),
|
||||
Instruction(opcode=0x18, name='XOR', description="Bitwise XOR operation."),
|
||||
Instruction(opcode=0x19, name='NOT', description="Bitwise NOT operation."),
|
||||
Instruction(opcode=0x1a, name='BYTE', description="Retrieve single byte from word"),
|
||||
|
||||
# SHA3
|
||||
Instruction(opcode=0x20, name='SHA3', description="Compute Keccak-256 hash."),
|
||||
|
||||
# Environmental Information
|
||||
Instruction(opcode=0x30, name='ADDRESS', description="Get address of currently executing account."),
|
||||
Instruction(opcode=0x31, name='BALANCE', description="Get balance of the given account."),
|
||||
Instruction(opcode=0x32, name='ORIGIN', description="Get execution origination address."),
|
||||
Instruction(opcode=0x33, name='CALLER',
|
||||
description="Get caller address.This is the address of the account that is directly responsible for this execution."),
|
||||
Instruction(opcode=0x34, name='CALLVALUE',
|
||||
description="Get deposited value by the instruction/transaction responsible for this execution."),
|
||||
Instruction(opcode=0x35, name='CALLDATALOAD', description="Get input data of current environment."),
|
||||
Instruction(opcode=0x36, name='CALLDATASIZE', description="Get size of input data in current environment."),
|
||||
Instruction(opcode=0x37, name='CALLDATACOPY',
|
||||
description="Copy input data in current environment to memory. This pertains to the input data passed with the message call instruction or transaction."),
|
||||
Instruction(opcode=0x38, name='CODESIZE', description="Get size of code running in current environment."),
|
||||
Instruction(opcode=0x39, name='CODECOPY', description="Copy code running in current environment to memory."),
|
||||
Instruction(opcode=0x3a, name='GASPRICE', description="Get price of gas in current environment."),
|
||||
Instruction(opcode=0x3b, name='EXTCODESIZE', description="Get size of an account’s code."),
|
||||
Instruction(opcode=0x3c, name='EXTCODECOPY', description="Copy an account’s code to memory."),
|
||||
Instruction(opcode=0x3d, name='RETURNDATASIZE',
|
||||
description="Push the size of the return data buffer onto the stack."),
|
||||
Instruction(opcode=0x3e, name='RETURNDATACOPY', description="Copy data from the return data buffer."),
|
||||
|
||||
# Block Information
|
||||
Instruction(opcode=0x40, name='BLOCKHASH',
|
||||
description="Get the hash of one of the 256 most recent complete blocks."),
|
||||
Instruction(opcode=0x41, name='COINBASE', description="Get the block’s beneficiary address."),
|
||||
Instruction(opcode=0x42, name='TIMESTAMP', description="Get the block’s timestamp."),
|
||||
Instruction(opcode=0x43, name='NUMBER', description="Get the block’s number."),
|
||||
Instruction(opcode=0x44, name='DIFFICULTY', description="Get the block’s difficulty."),
|
||||
Instruction(opcode=0x45, name='GASLIMIT', description="Get the block’s gas limit."),
|
||||
|
||||
# Stack, Memory, Storage and Flow Operations
|
||||
Instruction(opcode=0x50, name='POP', description="Remove item from stack."),
|
||||
Instruction(opcode=0x51, name='MLOAD', description="Load word from memory."),
|
||||
Instruction(opcode=0x52, name='MSTORE', description="Save word to memory."),
|
||||
Instruction(opcode=0x53, name='MSTORE8', length_of_operand=0x8, description="Save byte to memory."),
|
||||
Instruction(opcode=0x54, name='SLOAD', description="Load word from storage."),
|
||||
Instruction(opcode=0x55, name='SSTORE', description="Save word to storage."),
|
||||
Instruction(opcode=0x56, name='JUMP', description="Alter the program counter."),
|
||||
Instruction(opcode=0x57, name='JUMPI', description="Conditionally alter the program counter."),
|
||||
Instruction(opcode=0x58, name='PC', description="Get the value of the program counter prior to the increment."),
|
||||
Instruction(opcode=0x59, name='MSIZE', description="Get the size of active memory in bytes."),
|
||||
Instruction(opcode=0x5a, name='GAS',
|
||||
description="Get the amount of available gas, including the corresponding reduction"),
|
||||
Instruction(opcode=0x5b, name='JUMPDEST', description="Mark a valid destination for jumps."),
|
||||
|
||||
# Stack Push Operations
|
||||
Instruction(opcode=0x60, name='PUSH1', length_of_operand=0x1, description="Place 1 byte item on stack."),
|
||||
Instruction(opcode=0x61, name='PUSH2', length_of_operand=0x2, description="Place 2-byte item on stack."),
|
||||
Instruction(opcode=0x62, name='PUSH3', length_of_operand=0x3, description="Place 3-byte item on stack."),
|
||||
Instruction(opcode=0x63, name='PUSH4', length_of_operand=0x4, description="Place 4-byte item on stack."),
|
||||
Instruction(opcode=0x64, name='PUSH5', length_of_operand=0x5, description="Place 5-byte item on stack."),
|
||||
Instruction(opcode=0x65, name='PUSH6', length_of_operand=0x6, description="Place 6-byte item on stack."),
|
||||
Instruction(opcode=0x66, name='PUSH7', length_of_operand=0x7, description="Place 7-byte item on stack."),
|
||||
Instruction(opcode=0x67, name='PUSH8', length_of_operand=0x8, description="Place 8-byte item on stack."),
|
||||
Instruction(opcode=0x68, name='PUSH9', length_of_operand=0x9, description="Place 9-byte item on stack."),
|
||||
Instruction(opcode=0x69, name='PUSH10', length_of_operand=0xa, description="Place 10-byte item on stack."),
|
||||
Instruction(opcode=0x6a, name='PUSH11', length_of_operand=0xb, description="Place 11-byte item on stack."),
|
||||
Instruction(opcode=0x6b, name='PUSH12', length_of_operand=0xc, description="Place 12-byte item on stack."),
|
||||
Instruction(opcode=0x6c, name='PUSH13', length_of_operand=0xd, description="Place 13-byte item on stack."),
|
||||
Instruction(opcode=0x6d, name='PUSH14', length_of_operand=0xe, description="Place 14-byte item on stack."),
|
||||
Instruction(opcode=0x6e, name='PUSH15', length_of_operand=0xf, description="Place 15-byte item on stack."),
|
||||
Instruction(opcode=0x6f, name='PUSH16', length_of_operand=0x10, description="Place 16-byte item on stack."),
|
||||
Instruction(opcode=0x70, name='PUSH17', length_of_operand=0x11, description="Place 17-byte item on stack."),
|
||||
Instruction(opcode=0x71, name='PUSH18', length_of_operand=0x12, description="Place 18-byte item on stack."),
|
||||
Instruction(opcode=0x72, name='PUSH19', length_of_operand=0x13, description="Place 19-byte item on stack."),
|
||||
Instruction(opcode=0x73, name='PUSH20', length_of_operand=0x14, description="Place 20-byte item on stack."),
|
||||
Instruction(opcode=0x74, name='PUSH21', length_of_operand=0x15, description="Place 21-byte item on stack."),
|
||||
Instruction(opcode=0x75, name='PUSH22', length_of_operand=0x16, description="Place 22-byte item on stack."),
|
||||
Instruction(opcode=0x76, name='PUSH23', length_of_operand=0x17, description="Place 23-byte item on stack."),
|
||||
Instruction(opcode=0x77, name='PUSH24', length_of_operand=0x18, description="Place 24-byte item on stack."),
|
||||
Instruction(opcode=0x78, name='PUSH25', length_of_operand=0x19, description="Place 25-byte item on stack."),
|
||||
Instruction(opcode=0x79, name='PUSH26', length_of_operand=0x1a, description="Place 26-byte item on stack."),
|
||||
Instruction(opcode=0x7a, name='PUSH27', length_of_operand=0x1b, description="Place 27-byte item on stack."),
|
||||
Instruction(opcode=0x7b, name='PUSH28', length_of_operand=0x1c, description="Place 28-byte item on stack."),
|
||||
Instruction(opcode=0x7c, name='PUSH29', length_of_operand=0x1d, description="Place 29-byte item on stack."),
|
||||
Instruction(opcode=0x7d, name='PUSH30', length_of_operand=0x1e, description="Place 30-byte item on stack."),
|
||||
Instruction(opcode=0x7e, name='PUSH31', length_of_operand=0x1f, description="Place 31-byte item on stack."),
|
||||
Instruction(opcode=0x7f, name='PUSH32', length_of_operand=0x20,
|
||||
description="Place 32-byte (full word) item on stack."),
|
||||
|
||||
# Duplication Operations
|
||||
Instruction(opcode=0x80, name='DUP1', description="Duplicate 1st stack item."),
|
||||
Instruction(opcode=0x81, name='DUP2', description="Duplicate 2nd stack item."),
|
||||
Instruction(opcode=0x82, name='DUP3', description="Duplicate 3rd stack item."),
|
||||
Instruction(opcode=0x83, name='DUP4', description="Duplicate 4th stack item."),
|
||||
Instruction(opcode=0x84, name='DUP5', description="Duplicate 5th stack item."),
|
||||
Instruction(opcode=0x85, name='DUP6', description="Duplicate 6th stack item."),
|
||||
Instruction(opcode=0x86, name='DUP7', description="Duplicate 7th stack item."),
|
||||
Instruction(opcode=0x87, name='DUP8', description="Duplicate 8th stack item."),
|
||||
Instruction(opcode=0x88, name='DUP9', description="Duplicate 9th stack item."),
|
||||
Instruction(opcode=0x89, name='DUP10', description="Duplicate 10th stack item."),
|
||||
Instruction(opcode=0x8a, name='DUP11', description="Duplicate 11th stack item."),
|
||||
Instruction(opcode=0x8b, name='DUP12', description="Duplicate 12th stack item."),
|
||||
Instruction(opcode=0x8c, name='DUP13', description="Duplicate 13th stack item."),
|
||||
Instruction(opcode=0x8d, name='DUP14', description="Duplicate 14th stack item."),
|
||||
Instruction(opcode=0x8e, name='DUP15', description="Duplicate 15th stack item."),
|
||||
Instruction(opcode=0x8f, name='DUP16', description="Duplicate 16th stack item."),
|
||||
|
||||
# Exchange Operations
|
||||
Instruction(opcode=0x90, name='SWAP1', description="Exchange 1st and 2nd stack items."),
|
||||
Instruction(opcode=0x91, name='SWAP2', description="Exchange 1st and 3rd stack items."),
|
||||
Instruction(opcode=0x92, name='SWAP3', description="Exchange 1st and 4th stack items."),
|
||||
Instruction(opcode=0x93, name='SWAP4', description="Exchange 1st and 5th stack items."),
|
||||
Instruction(opcode=0x94, name='SWAP5', description="Exchange 1st and 6th stack items."),
|
||||
Instruction(opcode=0x95, name='SWAP6', description="Exchange 1st and 7th stack items."),
|
||||
Instruction(opcode=0x96, name='SWAP7', description="Exchange 1st and 8th stack items."),
|
||||
Instruction(opcode=0x97, name='SWAP8', description="Exchange 1st and 9th stack items."),
|
||||
Instruction(opcode=0x98, name='SWAP9', description="Exchange 1st and 10th stack items."),
|
||||
Instruction(opcode=0x99, name='SWAP10', description="Exchange 1st and 11th stack items."),
|
||||
Instruction(opcode=0x9a, name='SWAP11', description="Exchange 1st and 12th stack items."),
|
||||
Instruction(opcode=0x9b, name='SWAP12', description="Exchange 1st and 13th stack items."),
|
||||
Instruction(opcode=0x9c, name='SWAP13', description="Exchange 1st and 14th stack items."),
|
||||
Instruction(opcode=0x9d, name='SWAP14', description="Exchange 1st and 15th stack items."),
|
||||
Instruction(opcode=0x9e, name='SWAP15', description="Exchange 1st and 16th stack items."),
|
||||
Instruction(opcode=0x9f, name='SWAP16', description="Exchange 1st and 17th stack items."),
|
||||
|
||||
# Logging Operations
|
||||
Instruction(opcode=0xa0, name='LOG0', length_of_operand=0x0, description="Append log record with no topics."),
|
||||
Instruction(opcode=0xa1, name='LOG1', length_of_operand=0x1, description="Append log record with one topic."),
|
||||
Instruction(opcode=0xa2, name='LOG2', length_of_operand=0x2, description="Append log record with two topics."),
|
||||
Instruction(opcode=0xa3, name='LOG3', length_of_operand=0x3, description="Append log record with three topics."),
|
||||
Instruction(opcode=0xa4, name='LOG4', length_of_operand=0x4, description="Append log record with four topics."),
|
||||
|
||||
# System Operations
|
||||
Instruction(opcode=0xf0, name='CREATE', description="Create a new account with associated code."),
|
||||
Instruction(opcode=0xf1, name='CALL', description="Message-call into an account."),
|
||||
Instruction(opcode=0xf2, name='CALLCODE',
|
||||
description="Message-call into this account with alternative account’s code."),
|
||||
Instruction(opcode=0xf3, name='RETURN', description="Halt execution returning output data."),
|
||||
|
||||
# Newer opcode
|
||||
Instruction(opcode=0xfd, name='REVERT', description='throw an error'),
|
||||
|
||||
# Halt Execution, Mark for deletion
|
||||
Instruction(opcode=0xff, name='SUICIDE', description="Halt execution and register account for later deletion."), ]
|
||||
|
||||
OPCODE_MARKS_BASICBLOCK_END = ['JUMP', 'JUMPI', 'STOP', 'RETURN']
|
||||
|
||||
|
||||
class EVMCode(object):
|
||||
def __init__(self, debug=False):
|
||||
self.dis = EVMDisAssembler(debug=debug)
|
||||
self.first = None
|
||||
self.last = None
|
||||
self.duration = None
|
||||
|
||||
self.instruction_at = {} # address:instruction
|
||||
self.name_for_address = {} # address:name
|
||||
self.xrefs = {} # address:set(ref istruction,ref instruction)
|
||||
|
||||
def assemble(self, instructions):
|
||||
return '0x' + ''.join(inst.serialize() for inst in instructions)
|
||||
|
||||
def _iter(self, first=None):
|
||||
current = first or self.first
|
||||
yield current
|
||||
while current.next:
|
||||
current = current.next
|
||||
yield current
|
||||
|
||||
def disassemble(self, bytecode=None):
|
||||
"""
|
||||
for inst in self.dis.disassemble(bytecode):
|
||||
# return them as we process them
|
||||
yield inst
|
||||
"""
|
||||
if bytecode:
|
||||
t_start = time.time()
|
||||
disasm = list(self.dis.disassemble(bytecode))
|
||||
self.first = disasm[0]
|
||||
self.last = disasm[-1]
|
||||
self._update_address_space(self.first)
|
||||
self._update_xrefs()
|
||||
self.duration = time.time() - t_start
|
||||
|
||||
# current = self.first
|
||||
return self._iter()
|
||||
|
||||
def _update_address_space(self, first):
|
||||
for instruction in self._iter(first):
|
||||
self.instruction_at[instruction.address] = instruction
|
||||
|
||||
def _update_xrefs(self):
|
||||
# find all JUMP, JUMPI's
|
||||
for loc, instruction in ((l, i) for l, i in self.instruction_at.items() if i.name in ("JUMP", "JUMPI")):
|
||||
if instruction.previous and instruction.previous.name.startswith("PUSH"):
|
||||
instruction.jumpto = int(instruction.previous.operand, 16)
|
||||
target_instruction = self.instruction_at.get(instruction.jumpto)
|
||||
if target_instruction and target_instruction.name == "JUMPDEST":
|
||||
# valid address, valid target
|
||||
self.xrefs.setdefault(instruction.jumpto, set([]))
|
||||
self.xrefs[instruction.jumpto] = instruction
|
||||
target_instruction.xrefs.add(instruction)
|
||||
|
||||
def basicblocks(self, disasm):
|
||||
# listify it in order to resolve xrefs, jumps
|
||||
current_basicblock = BasicBlock(address=0, name="init")
|
||||
|
||||
for i, nm in enumerate(disasm):
|
||||
if nm.name == "JUMPDEST":
|
||||
# jumpdest belongs tto the new basicblock (marks the start)
|
||||
yield current_basicblock
|
||||
current_basicblock = BasicBlock(address=nm.address, name="loc_%s"% hex(nm.address))
|
||||
|
||||
# add to current basicblock
|
||||
current_basicblock.instructions.append(nm)
|
||||
nm.basicblock = current_basicblock
|
||||
# yield the last basicblock
|
||||
yield current_basicblock
|
||||
|
||||
|
||||
class EVMDisAssembler(object):
|
||||
OPCODE_TABLE = dict((obj.opcode, obj) for obj in OPCODES)
|
||||
|
||||
def __init__(self, debug=False):
|
||||
self.errors = []
|
||||
self.debug = debug
|
||||
|
||||
def disassemble(self, bytecode):
|
||||
""" Disassemble evm bytecode to a Instruction objects """
|
||||
|
||||
def iterbytes(bytecode):
|
||||
iter_bytecode = (b for b in bytecode if b in '1234567890abcdefABCDEFx') # 0x will bail below.
|
||||
for b in zip(iter_bytecode, iter_bytecode):
|
||||
b = ''.join(b)
|
||||
try:
|
||||
yield int(b, 16)
|
||||
except ValueError:
|
||||
logger.warning("skipping invalid byte: %s" % repr(b))
|
||||
|
||||
pc = 0
|
||||
previous = None
|
||||
iter_bytecode = iterbytes(bytecode)
|
||||
# disassemble
|
||||
seen_stop = False
|
||||
for opcode in iter_bytecode:
|
||||
logger.debug(opcode)
|
||||
try:
|
||||
instruction = self.OPCODE_TABLE[opcode].consume(iter_bytecode)
|
||||
except KeyError as ke:
|
||||
instruction = Instruction(opcode=opcode,
|
||||
name="UNKNOWN_%s" % hex(opcode),
|
||||
description="Invalid opcode")
|
||||
|
||||
if not seen_stop:
|
||||
msg = "error: byte at address %d (%s) is not a valid operator" % (pc, hex(opcode))
|
||||
if self.debug:
|
||||
logger.exception(msg)
|
||||
self.errors.append("%s; %r" % (msg, ke))
|
||||
if instruction.name == 'STOP' and not seen_stop:
|
||||
seen_stop = True
|
||||
instruction.address = pc
|
||||
pc += instruction.size()
|
||||
# doubly link
|
||||
instruction.previous = previous
|
||||
if previous:
|
||||
previous.next = instruction
|
||||
|
||||
# current is previous
|
||||
previous = instruction
|
||||
yield instruction
|
||||
|
||||
def assemble(self, instructions):
|
||||
""" Assemble a list of Instruction() objects to evm bytecode"""
|
||||
for instruction in instructions:
|
||||
yield instruction.serialize()
|
||||
|
||||
|
||||
class EVMDasmPrinter:
|
||||
""" utility class for different output formats
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def listing(disasm):
|
||||
for i, nm in enumerate(disasm):
|
||||
print("%s %s" % (nm.name, nm.operand))
|
||||
|
||||
@staticmethod
|
||||
def detailed(disasm, resolve_funcsig=False):
|
||||
print("%-3s %-4s %-3s %-15s %-36s %-30s %s" % (
|
||||
"Inst", "addr", " hex ", "mnemonic", "operand", "xrefs", "description"))
|
||||
print("-" * 150)
|
||||
# listify it in order to resolve xrefs, jumps
|
||||
for i, nm in enumerate(disasm):
|
||||
if nm.name == "JUMPDEST":
|
||||
print(":loc_%s" % hex(nm.address))
|
||||
try:
|
||||
operand = ','.join('%s@%s' % (x.name, hex(x.address)) for x in nm.xrefs) if nm.xrefs else ''
|
||||
print("%4d [%3d 0x%0.3x] %-15s %-36s %-30s # %s" % (i, nm.address, nm.address, nm.name,
|
||||
nm.describe_operand(resolve_funcsig=resolve_funcsig),
|
||||
operand,
|
||||
nm.description))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
if nm.name in OPCODE_MARKS_BASICBLOCK_END:
|
||||
print("")
|
||||
|
||||
@staticmethod
|
||||
def basicblocks_detailed(basicblocks, resolve_funcsig=False):
|
||||
print("%-3s %-4s %-3s %-15s %-36s %-30s %s" % (
|
||||
"Inst", "addr", " hex ", "mnemonic", "operand", "xrefs", "description"))
|
||||
print("-" * 150)
|
||||
|
||||
i = 0
|
||||
for bb in basicblocks:
|
||||
# every basicblock
|
||||
print(":loc_%s" % hex(bb.address))
|
||||
for nm in bb.instructions:
|
||||
try:
|
||||
operand = ','.join('%s@%s' % (x.name, hex(x.address)) for x in nm.xrefs) if nm.xrefs else ''
|
||||
print("%4d [%3d 0x%0.3x] %-15s %-36s %-30s # %s" % (i, nm.address, nm.address, nm.name,
|
||||
nm.describe_operand(
|
||||
resolve_funcsig=resolve_funcsig),
|
||||
operand,
|
||||
nm.description))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
i += 1
|
||||
if nm.name in OPCODE_MARKS_BASICBLOCK_END:
|
||||
print("")
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(format="%(levelname)-7s - %(message)s")
|
||||
from optparse import OptionParser
|
||||
usage = """usage: %prog [options]
|
||||
|
||||
example: %prog [-L -F -v] <file_or_bytecode>
|
||||
%prog [-L -F -v] # read from stdin
|
||||
%prog [-L -F -a <address>] # fetch contract code from infura.io
|
||||
"""
|
||||
parser = OptionParser(usage=usage)
|
||||
loglevels = ['CRITICAL', 'FATAL', 'ERROR', 'WARNING', 'WARN', 'INFO', 'DEBUG', 'NOTSET']
|
||||
parser.add_option("-v", "--verbosity", default="critical",
|
||||
help="available loglevels: %s [default: %%default]" % ','.join(l.lower() for l in loglevels))
|
||||
parser.add_option("-L", "--listing", action="store_true", dest="listing",
|
||||
help="disables table mode, outputs assembly only")
|
||||
parser.add_option("-F", "--no-online-lookup", action="store_false", default=True, dest="function_signature_lookup",
|
||||
help="disable online function signature lookup")
|
||||
parser.add_option("-a", "--address",
|
||||
help="fetch contract bytecode from address")
|
||||
|
||||
# parse args
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if options.verbosity.upper() in loglevels:
|
||||
options.verbosity = getattr(logging, options.verbosity.upper())
|
||||
logger.setLevel(options.verbosity)
|
||||
else:
|
||||
parser.error("invalid verbosity selected. please check --help")
|
||||
|
||||
if options.function_signature_lookup and not ethereum_input_decoder:
|
||||
logger.warning("ethereum_input_decoder package not installed. function signature lookup not available.(pip install ethereum-input-decoder)")
|
||||
|
||||
# get bytecode from stdin, or arg:file or arg:bytcode
|
||||
if options.address:
|
||||
api = EthJsonRpc("https://mainnet.infura.io/")
|
||||
evmcode = api.call(method="eth_getCode", params=[options.address, "latest"])["result"]
|
||||
elif not args:
|
||||
evmcode = sys.stdin.read()
|
||||
else:
|
||||
if os.path.isfile(args[0]):
|
||||
evmcode = open(args[0], 'r').read()
|
||||
else:
|
||||
evmcode = args[0]
|
||||
|
||||
# init analyzer
|
||||
evm_dasm = EVMCode(debug=options.verbosity)
|
||||
logger.debug(EVMDisAssembler.OPCODE_TABLE)
|
||||
|
||||
# print dissasembly
|
||||
if options.listing:
|
||||
EVMDasmPrinter.listing(evm_dasm.disassemble(evmcode))
|
||||
else:
|
||||
EVMDasmPrinter.basicblocks_detailed(evm_dasm.basicblocks(evm_dasm.disassemble(evmcode)), resolve_funcsig=options.function_signature_lookup)
|
||||
#EVMDasmPrinter.detailed(evm_dasm.disassemble(evmcode), resolve_funcsig=options.function_signature_lookup)
|
||||
|
||||
logger.info("finished in %0.3f seconds." % evm_dasm.duration)
|
||||
# post a notification that disassembly might be incorrect due to errors
|
||||
if evm_dasm.dis.errors:
|
||||
logger.warning("disassembly finished with %d errors" % len(evm_dasm.dis.errors))
|
||||
if options.verbosity >= 30:
|
||||
logger.warning("use -v INFO to see the errors")
|
||||
else:
|
||||
for e in evm_dasm.dis.errors:
|
||||
logger.info(e)
|
||||
|
||||
# quick check
|
||||
logger.debug("assemble(disassemble(evmcode))==",
|
||||
evmcode.strip() == ''.join(evm_dasm.assemble(evm_dasm.disassemble())))
|
||||
sys.exit(len(evm_dasm.dis.errors))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
26
ethereumetl.py
Normal file
26
ethereumetl.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.cli import cli
|
||||
|
||||
cli()
|
||||
26
ethereumetl/__main__.py
Normal file
26
ethereumetl/__main__.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from ethereumetl.cli import cli
|
||||
|
||||
cli()
|
||||
81
ethereumetl/cli/__init__.py
Normal file
81
ethereumetl/cli/__init__.py
Normal file
@@ -0,0 +1,81 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
logging_basic_config()
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.cli.export_all import export_all
|
||||
from ethereumetl.cli.export_blocks_and_transactions import export_blocks_and_transactions
|
||||
from ethereumetl.cli.export_contracts import export_contracts
|
||||
from ethereumetl.cli.export_geth_traces import export_geth_traces
|
||||
from ethereumetl.cli.export_origin import export_origin
|
||||
from ethereumetl.cli.export_receipts_and_logs import export_receipts_and_logs
|
||||
from ethereumetl.cli.export_token_transfers import export_token_transfers
|
||||
from ethereumetl.cli.export_tokens import export_tokens
|
||||
from ethereumetl.cli.export_traces import export_traces
|
||||
from ethereumetl.cli.extract_contracts import extract_contracts
|
||||
from ethereumetl.cli.extract_csv_column import extract_csv_column
|
||||
from ethereumetl.cli.extract_field import extract_field
|
||||
from ethereumetl.cli.extract_geth_traces import extract_geth_traces
|
||||
from ethereumetl.cli.extract_token_transfers import extract_token_transfers
|
||||
from ethereumetl.cli.extract_tokens import extract_tokens
|
||||
from ethereumetl.cli.filter_items import filter_items
|
||||
from ethereumetl.cli.get_block_range_for_date import get_block_range_for_date
|
||||
from ethereumetl.cli.get_block_range_for_timestamps import get_block_range_for_timestamps
|
||||
from ethereumetl.cli.get_keccak_hash import get_keccak_hash
|
||||
from ethereumetl.cli.stream import stream
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option(version='2.4.2')
|
||||
@click.pass_context
|
||||
def cli(ctx):
|
||||
pass
|
||||
|
||||
|
||||
# export
|
||||
cli.add_command(export_all, "export_all")
|
||||
cli.add_command(export_blocks_and_transactions, "export_blocks_and_transactions")
|
||||
cli.add_command(export_origin, "export_origin")
|
||||
cli.add_command(export_receipts_and_logs, "export_receipts_and_logs")
|
||||
cli.add_command(export_token_transfers, "export_token_transfers")
|
||||
cli.add_command(extract_token_transfers, "extract_token_transfers")
|
||||
cli.add_command(export_contracts, "export_contracts")
|
||||
cli.add_command(export_tokens, "export_tokens")
|
||||
cli.add_command(export_traces, "export_traces")
|
||||
cli.add_command(export_geth_traces, "export_geth_traces")
|
||||
cli.add_command(extract_geth_traces, "extract_geth_traces")
|
||||
cli.add_command(extract_contracts, "extract_contracts")
|
||||
cli.add_command(extract_tokens, "extract_tokens")
|
||||
|
||||
# streaming
|
||||
cli.add_command(stream, "stream")
|
||||
|
||||
# utils
|
||||
cli.add_command(get_block_range_for_date, "get_block_range_for_date")
|
||||
cli.add_command(get_block_range_for_timestamps, "get_block_range_for_timestamps")
|
||||
cli.add_command(get_keccak_hash, "get_keccak_hash")
|
||||
cli.add_command(extract_csv_column, "extract_csv_column")
|
||||
cli.add_command(filter_items, "filter_items")
|
||||
cli.add_command(extract_field, "extract_field")
|
||||
124
ethereumetl/cli/export_all.py
Normal file
124
ethereumetl/cli/export_all.py
Normal file
@@ -0,0 +1,124 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import click
|
||||
import re
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.web3_utils import build_web3
|
||||
|
||||
from ethereumetl.jobs.export_all_common import export_all_common
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.service.eth_service import EthService
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
def is_date_range(start, end):
|
||||
"""Checks for YYYY-MM-DD date format."""
|
||||
return bool(re.match('^2[0-9]{3}-[0-9]{2}-[0-9]{2}$', start) and
|
||||
re.match('^2[0-9]{3}-[0-9]{2}-[0-9]{2}$', end))
|
||||
|
||||
|
||||
def is_unix_time_range(start, end):
|
||||
"""Checks for Unix timestamp format."""
|
||||
return bool(re.match("^[0-9]{10}$|^[0-9]{13}$", start) and
|
||||
re.match("^[0-9]{10}$|^[0-9]{13}$", end))
|
||||
|
||||
|
||||
def is_block_range(start, end):
|
||||
"""Checks for a valid block number."""
|
||||
return (start.isdigit() and 0 <= int(start) <= 99999999 and
|
||||
end.isdigit() and 0 <= int(end) <= 99999999)
|
||||
|
||||
|
||||
def get_partitions(start, end, partition_batch_size, provider_uri):
|
||||
"""Yield partitions based on input data type."""
|
||||
if is_date_range(start, end) or is_unix_time_range(start, end):
|
||||
if is_date_range(start, end):
|
||||
start_date = datetime.strptime(start, '%Y-%m-%d').date()
|
||||
end_date = datetime.strptime(end, '%Y-%m-%d').date()
|
||||
|
||||
elif is_unix_time_range(start, end):
|
||||
if len(start) == 10 and len(end) == 10:
|
||||
start_date = datetime.utcfromtimestamp(int(start)).date()
|
||||
end_date = datetime.utcfromtimestamp(int(end)).date()
|
||||
|
||||
elif len(start) == 13 and len(end) == 13:
|
||||
start_date = datetime.utcfromtimestamp(int(start) / 1e3).date()
|
||||
end_date = datetime.utcfromtimestamp(int(end) / 1e3).date()
|
||||
|
||||
day = timedelta(days=1)
|
||||
|
||||
provider = get_provider_from_uri(provider_uri)
|
||||
web3 = build_web3(provider)
|
||||
eth_service = EthService(web3)
|
||||
|
||||
while start_date <= end_date:
|
||||
batch_start_block, batch_end_block = eth_service.get_block_range_for_date(start_date)
|
||||
partition_dir = '/date={start_date!s}/'.format(start_date=start_date)
|
||||
yield batch_start_block, batch_end_block, partition_dir
|
||||
start_date += day
|
||||
|
||||
elif is_block_range(start, end):
|
||||
start_block = int(start)
|
||||
end_block = int(end)
|
||||
|
||||
for batch_start_block in range(start_block, end_block + 1, partition_batch_size):
|
||||
batch_end_block = batch_start_block + partition_batch_size - 1
|
||||
if batch_end_block > end_block:
|
||||
batch_end_block = end_block
|
||||
|
||||
padded_batch_start_block = str(batch_start_block).zfill(8)
|
||||
padded_batch_end_block = str(batch_end_block).zfill(8)
|
||||
partition_dir = '/start_block={padded_batch_start_block}/end_block={padded_batch_end_block}'.format(
|
||||
padded_batch_start_block=padded_batch_start_block,
|
||||
padded_batch_end_block=padded_batch_end_block,
|
||||
)
|
||||
yield batch_start_block, batch_end_block, partition_dir
|
||||
|
||||
else:
|
||||
raise ValueError('start and end must be either block numbers or ISO dates or Unix times')
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-s', '--start', required=True, type=str, help='Start block/ISO date/Unix time')
|
||||
@click.option('-e', '--end', required=True, type=str, help='End block/ISO date/Unix time')
|
||||
@click.option('-b', '--partition-batch-size', default=10000, show_default=True, type=int,
|
||||
help='The number of blocks to export in partition.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-o', '--output-dir', default='output', show_default=True, type=str, help='Output directory, partitioned in Hive style.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-B', '--export-batch-size', default=100, show_default=True, type=int, help='The number of requests in JSON RPC batches.')
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def export_all(start, end, partition_batch_size, provider_uri, output_dir, max_workers, export_batch_size,
|
||||
chain='ethereum'):
|
||||
"""Exports all data for a range of blocks."""
|
||||
provider_uri = check_classic_provider_uri(chain, provider_uri)
|
||||
export_all_common(get_partitions(start, end, partition_batch_size, provider_uri),
|
||||
output_dir, provider_uri, max_workers, export_batch_size)
|
||||
66
ethereumetl/cli/export_blocks_and_transactions.py
Normal file
66
ethereumetl/cli/export_blocks_and_transactions.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.jobs.export_blocks_job import ExportBlocksJob
|
||||
from ethereumetl.jobs.exporters.blocks_and_transactions_item_exporter import blocks_and_transactions_item_exporter
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
|
||||
@click.option('-e', '--end-block', required=True, type=int, help='End block')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to export at a time.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('--blocks-output', default=None, show_default=True, type=str,
|
||||
help='The output file for blocks. If not provided blocks will not be exported. Use "-" for stdout')
|
||||
@click.option('--transactions-output', default=None, show_default=True, type=str,
|
||||
help='The output file for transactions. '
|
||||
'If not provided transactions will not be exported. Use "-" for stdout')
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, max_workers, blocks_output,
|
||||
transactions_output, chain='ethereum'):
|
||||
"""Exports blocks and transactions."""
|
||||
provider_uri = check_classic_provider_uri(chain, provider_uri)
|
||||
if blocks_output is None and transactions_output is None:
|
||||
raise ValueError('Either --blocks-output or --transactions-output options must be provided')
|
||||
|
||||
job = ExportBlocksJob(
|
||||
start_block=start_block,
|
||||
end_block=end_block,
|
||||
batch_size=batch_size,
|
||||
batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
|
||||
max_workers=max_workers,
|
||||
item_exporter=blocks_and_transactions_item_exporter(blocks_output, transactions_output),
|
||||
export_blocks=blocks_output is not None,
|
||||
export_transactions=transactions_output is not None)
|
||||
job.run()
|
||||
60
ethereumetl/cli/export_contracts.py
Normal file
60
ethereumetl/cli/export_contracts.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import click
|
||||
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.export_contracts_job import ExportContractsJob
|
||||
from ethereumetl.jobs.exporters.contracts_item_exporter import contracts_item_exporter
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-ca', '--contract-addresses', required=True, type=str,
|
||||
help='The file containing contract addresses, one per line.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def export_contracts(batch_size, contract_addresses, output, max_workers, provider_uri, chain='ethereum'):
|
||||
"""Exports contracts bytecode and sighashes."""
|
||||
check_classic_provider_uri(chain, provider_uri)
|
||||
with smart_open(contract_addresses, 'r') as contract_addresses_file:
|
||||
contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file
|
||||
if contract_address.strip())
|
||||
job = ExportContractsJob(
|
||||
contract_addresses_iterable=contract_addresses,
|
||||
batch_size=batch_size,
|
||||
batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
|
||||
item_exporter=contracts_item_exporter(output),
|
||||
max_workers=max_workers)
|
||||
|
||||
job.run()
|
||||
55
ethereumetl/cli/export_geth_traces.py
Normal file
55
ethereumetl/cli/export_geth_traces.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeniy Filatov, evgeniyfilatov@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.jobs.export_geth_traces_job import ExportGethTracesJob
|
||||
from ethereumetl.jobs.exporters.geth_traces_item_exporter import geth_traces_item_exporter
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
|
||||
@click.option('-e', '--end-block', required=True, type=int, help='End block')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to process at a time.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str,
|
||||
help='The output file for geth traces. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', required=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or http://localhost:8545/')
|
||||
def export_geth_traces(start_block, end_block, batch_size, output, max_workers, provider_uri):
|
||||
"""Exports traces from geth node."""
|
||||
job = ExportGethTracesJob(
|
||||
start_block=start_block,
|
||||
end_block=end_block,
|
||||
batch_size=batch_size,
|
||||
batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
|
||||
max_workers=max_workers,
|
||||
item_exporter=geth_traces_item_exporter(output))
|
||||
|
||||
job.run()
|
||||
56
ethereumetl/cli/export_origin.py
Normal file
56
ethereumetl/cli/export_origin.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# A job to export data from Origin Protocol.
|
||||
#
|
||||
# Origin Protocol is an open source platform for implementing blockchain e-commerce.
|
||||
# More details at https://www.originprotool.com
|
||||
#
|
||||
# The core of the platform is the marketplace smart contract:
|
||||
# - Code: https://etherscan.io/address/0x698ff47b84837d3971118a369c570172ee7e54c2
|
||||
# - Address: https://github.com/OriginProtocol/origin/blob/master/packages/contracts/contracts/marketplace/V01_Marketplace.sol
|
||||
#
|
||||
# Transactional data is stored on-chain, while side-metadata is stored in IPFS (https://ipfs.io).
|
||||
#
|
||||
# Given a range of block numbers, the job queries the blockchain for events emitted by the contract.
|
||||
# Every event includes a hash pointing to a marketplace listing metadata stored as a JSON file on IPFS.
|
||||
# A marketplace listing can either be a single self-contained listing, or the entry point for the entire
|
||||
# catalog of products from a shop.
|
||||
#
|
||||
# The job generates 2 data sets:
|
||||
# - Marketplace listings
|
||||
# - Shop products.
|
||||
#
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.web3_utils import build_web3
|
||||
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.jobs.export_origin_job import ExportOriginJob
|
||||
from ethereumetl.jobs.exporters.origin_exporter import origin_marketplace_listing_item_exporter, origin_shop_product_item_exporter
|
||||
from ethereumetl.ipfs.origin import get_origin_ipfs_client
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
|
||||
@click.option('-e', '--end-block', required=True, type=int, help='End block')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('--marketplace-output', default='-', show_default=True, type=str, help='The output file for marketplace data. If not specified stdout is used.')
|
||||
@click.option('--shop-output', default='-', show_default=True, type=str, help='The output file for shop data. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', required=True, type=str,
|
||||
help='The URI of the web3 provider e.g. file://$HOME/Library/Ethereum/geth.ipc or http://localhost:8545/')
|
||||
def export_origin(start_block, end_block, batch_size, marketplace_output, shop_output, max_workers, provider_uri):
|
||||
"""Exports Origin Protocol data."""
|
||||
job = ExportOriginJob(
|
||||
start_block=start_block,
|
||||
end_block=end_block,
|
||||
batch_size=batch_size,
|
||||
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
|
||||
ipfs_client=get_origin_ipfs_client(),
|
||||
marketplace_listing_exporter=origin_marketplace_listing_item_exporter(marketplace_output),
|
||||
shop_product_exporter=origin_shop_product_item_exporter(shop_output),
|
||||
max_workers=max_workers)
|
||||
job.run()
|
||||
65
ethereumetl/cli/export_receipts_and_logs.py
Normal file
65
ethereumetl/cli/export_receipts_and_logs.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import click
|
||||
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.export_receipts_job import ExportReceiptsJob
|
||||
from ethereumetl.jobs.exporters.receipts_and_logs_item_exporter import receipts_and_logs_item_exporter
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of receipts to export at a time.')
|
||||
@click.option('-t', '--transaction-hashes', required=True, type=str,
|
||||
help='The file containing transaction hashes, one per line.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('--receipts-output', default=None, show_default=True, type=str,
|
||||
help='The output file for receipts. If not provided receipts will not be exported. Use "-" for stdout')
|
||||
@click.option('--logs-output', default=None, show_default=True, type=str,
|
||||
help='The output file for receipt logs. '
|
||||
'If not provided receipt logs will not be exported. Use "-" for stdout')
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def export_receipts_and_logs(batch_size, transaction_hashes, provider_uri, max_workers, receipts_output, logs_output,
|
||||
chain='ethereum'):
|
||||
"""Exports receipts and logs."""
|
||||
provider_uri = check_classic_provider_uri(chain, provider_uri)
|
||||
with smart_open(transaction_hashes, 'r') as transaction_hashes_file:
|
||||
job = ExportReceiptsJob(
|
||||
transaction_hashes_iterable=(transaction_hash.strip() for transaction_hash in transaction_hashes_file),
|
||||
batch_size=batch_size,
|
||||
batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
|
||||
max_workers=max_workers,
|
||||
item_exporter=receipts_and_logs_item_exporter(receipts_output, logs_output),
|
||||
export_receipts=receipts_output is not None,
|
||||
export_logs=logs_output is not None)
|
||||
|
||||
job.run()
|
||||
58
ethereumetl/cli/export_token_transfers.py
Normal file
58
ethereumetl/cli/export_token_transfers.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.web3_utils import build_web3
|
||||
|
||||
from ethereumetl.csv_utils import set_max_field_size_limit
|
||||
from ethereumetl.jobs.export_token_transfers_job import ExportTokenTransfersJob
|
||||
from ethereumetl.jobs.exporters.token_transfers_item_exporter import token_transfers_item_exporter
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
|
||||
@click.option('-e', '--end-block', required=True, type=int, help='End block')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', required=True, type=str,
|
||||
help='The URI of the web3 provider e.g. file://$HOME/Library/Ethereum/geth.ipc or http://localhost:8545/')
|
||||
@click.option('-t', '--tokens', default=None, show_default=True, type=str, multiple=True, help='The list of token addresses to filter by.')
|
||||
def export_token_transfers(start_block, end_block, batch_size, output, max_workers, provider_uri, tokens):
|
||||
"""Exports ERC20/ERC721 transfers."""
|
||||
set_max_field_size_limit()
|
||||
job = ExportTokenTransfersJob(
|
||||
start_block=start_block,
|
||||
end_block=end_block,
|
||||
batch_size=batch_size,
|
||||
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
|
||||
item_exporter=token_transfers_item_exporter(output),
|
||||
max_workers=max_workers,
|
||||
tokens=tokens)
|
||||
job.run()
|
||||
58
ethereumetl/cli/export_tokens.py
Normal file
58
ethereumetl/cli/export_tokens.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.web3_utils import build_web3
|
||||
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.export_tokens_job import ExportTokensJob
|
||||
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-t', '--token-addresses', required=True, type=str,
|
||||
help='The file containing token addresses, one per line.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def export_tokens(token_addresses, output, max_workers, provider_uri, chain='ethereum'):
|
||||
"""Exports ERC20/ERC721 tokens."""
|
||||
provider_uri = check_classic_provider_uri(chain, provider_uri)
|
||||
with smart_open(token_addresses, 'r') as token_addresses_file:
|
||||
job = ExportTokensJob(
|
||||
token_addresses_iterable=(token_address.strip() for token_address in token_addresses_file),
|
||||
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
|
||||
item_exporter=tokens_item_exporter(output),
|
||||
max_workers=max_workers)
|
||||
|
||||
job.run()
|
||||
66
ethereumetl/cli/export_traces.py
Normal file
66
ethereumetl/cli/export_traces.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeniy Filatov, evgeniyfilatov@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.web3_utils import build_web3
|
||||
|
||||
from ethereumetl.jobs.export_traces_job import ExportTracesJob
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.jobs.exporters.traces_item_exporter import traces_item_exporter
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-s', '--start-block', default=0, show_default=True, type=int, help='Start block')
|
||||
@click.option('-e', '--end-block', required=True, type=int, help='End block')
|
||||
@click.option('-b', '--batch-size', default=5, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('-p', '--provider-uri', required=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/.local/share/io.parity.ethereum/jsonrpc.ipc or http://localhost:8545/')
|
||||
@click.option('--genesis-traces/--no-genesis-traces', default=False, show_default=True, help='Whether to include genesis traces')
|
||||
@click.option('--daofork-traces/--no-daofork-traces', default=False, show_default=True, help='Whether to include daofork traces')
|
||||
@click.option('-t', '--timeout', default=60, show_default=True, type=int, help='IPC or HTTP request timeout.')
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def export_traces(start_block, end_block, batch_size, output, max_workers, provider_uri,
|
||||
genesis_traces, daofork_traces, timeout=60, chain='ethereum'):
|
||||
"""Exports traces from parity node."""
|
||||
if chain == 'classic' and daofork_traces == True:
|
||||
raise ValueError(
|
||||
'Classic chain does not include daofork traces. Disable daofork traces with --no-daofork-traces option.')
|
||||
job = ExportTracesJob(
|
||||
start_block=start_block,
|
||||
end_block=end_block,
|
||||
batch_size=batch_size,
|
||||
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri, timeout=timeout))),
|
||||
item_exporter=traces_item_exporter(output),
|
||||
max_workers=max_workers,
|
||||
include_genesis_traces=genesis_traces,
|
||||
include_daofork_traces=daofork_traces)
|
||||
|
||||
job.run()
|
||||
58
ethereumetl/cli/extract_contracts.py
Normal file
58
ethereumetl/cli/extract_contracts.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import csv
|
||||
import json
|
||||
|
||||
import click
|
||||
from blockchainetl.csv_utils import set_max_field_size_limit
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.exporters.contracts_item_exporter import contracts_item_exporter
|
||||
from ethereumetl.jobs.extract_contracts_job import ExtractContractsJob
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-t', '--traces', type=str, required=True, help='The CSV file containing traces.')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
def extract_contracts(traces, batch_size, output, max_workers):
|
||||
"""Extracts contracts from traces file."""
|
||||
|
||||
set_max_field_size_limit()
|
||||
|
||||
with smart_open(traces, 'r') as traces_file:
|
||||
if traces.endswith('.json'):
|
||||
traces_iterable = (json.loads(line) for line in traces_file)
|
||||
else:
|
||||
traces_iterable = csv.DictReader(traces_file)
|
||||
job = ExtractContractsJob(
|
||||
traces_iterable=traces_iterable,
|
||||
batch_size=batch_size,
|
||||
max_workers=max_workers,
|
||||
item_exporter=contracts_item_exporter(output))
|
||||
|
||||
job.run()
|
||||
@@ -21,22 +21,22 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import argparse
|
||||
import click
|
||||
import csv
|
||||
|
||||
from ethereumetl.csv_utils import set_max_field_size_limit
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from blockchainetl.file_utils import smart_open
|
||||
|
||||
parser = argparse.ArgumentParser(description='Extracts a single column from a given csv file.')
|
||||
parser.add_argument('-i', '--input', default='-', type=str, help='The input file. If not specified stdin is used.')
|
||||
parser.add_argument('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
parser.add_argument('-c', '--column', required=True, type=str, help='The csv column name to extract.')
|
||||
|
||||
args = parser.parse_args()
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-i', '--input', default='-', show_default=True, type=str, help='The input file. If not specified stdin is used.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-c', '--column', required=True, type=str, help='The csv column name to extract.')
|
||||
def extract_csv_column(input, output, column):
|
||||
"""Extracts column from given CSV file. Deprecated - use extract_field."""
|
||||
set_max_field_size_limit()
|
||||
|
||||
set_max_field_size_limit()
|
||||
|
||||
with smart_open(args.input, 'r') as input_file, smart_open(args.output, 'w') as output_file:
|
||||
reader = csv.DictReader(input_file)
|
||||
for row in reader:
|
||||
output_file.write(row[args.column] + '\n')
|
||||
with smart_open(input, 'r') as input_file, smart_open(output, 'w') as output_file:
|
||||
reader = csv.DictReader(input_file)
|
||||
for row in reader:
|
||||
output_file.write(row[column] + '\n')
|
||||
35
ethereumetl/cli/extract_field.py
Normal file
35
ethereumetl/cli/extract_field.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl import misc_utils
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-i', '--input', default='-', show_default=True, type=str, help='The input file. If not specified stdin is used.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-f', '--field', required=True, type=str, help='The field name to extract.')
|
||||
def extract_field(input, output, field):
|
||||
"""Extracts field from given CSV or JSON newline-delimited file."""
|
||||
misc_utils.extract_field(input, output, field)
|
||||
53
ethereumetl/cli/extract_geth_traces.py
Normal file
53
ethereumetl/cli/extract_geth_traces.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeniy Filatov, evgeniyfilatov@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
import csv
|
||||
import json
|
||||
|
||||
import click
|
||||
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.exporters.traces_item_exporter import traces_item_exporter
|
||||
from ethereumetl.jobs.extract_geth_traces_job import ExtractGethTracesJob
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-i', '--input', required=True, type=str, help='The JSON file containing geth traces.')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
def extract_geth_traces(input, batch_size, output, max_workers):
|
||||
"""Extracts geth traces from JSON lines file."""
|
||||
with smart_open(input, 'r') as geth_traces_file:
|
||||
if input.endswith('.json'):
|
||||
traces_iterable = (json.loads(line) for line in geth_traces_file)
|
||||
else:
|
||||
traces_iterable = (trace for trace in csv.DictReader(geth_traces_file))
|
||||
job = ExtractGethTracesJob(
|
||||
traces_iterable=traces_iterable,
|
||||
batch_size=batch_size,
|
||||
max_workers=max_workers,
|
||||
item_exporter=traces_item_exporter(output))
|
||||
|
||||
job.run()
|
||||
59
ethereumetl/cli/extract_token_transfers.py
Normal file
59
ethereumetl/cli/extract_token_transfers.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import click
|
||||
import csv
|
||||
import json
|
||||
|
||||
from ethereumetl.csv_utils import set_max_field_size_limit
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from blockchainetl.jobs.exporters.converters.int_to_string_item_converter import IntToStringItemConverter
|
||||
from ethereumetl.jobs.exporters.token_transfers_item_exporter import token_transfers_item_exporter
|
||||
from ethereumetl.jobs.extract_token_transfers_job import ExtractTokenTransfersJob
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
set_max_field_size_limit()
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-l', '--logs', type=str, required=True, help='The CSV file containing receipt logs.')
|
||||
@click.option('-b', '--batch-size', default=100, show_default=True, type=int, help='The number of blocks to filter at a time.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('--values-as-strings', default=False, show_default=True, is_flag=True, help='Whether to convert values to strings.')
|
||||
def extract_token_transfers(logs, batch_size, output, max_workers, values_as_strings=False):
|
||||
"""Extracts ERC20/ERC721 transfers from logs file."""
|
||||
with smart_open(logs, 'r') as logs_file:
|
||||
if logs.endswith('.json'):
|
||||
logs_reader = (json.loads(line) for line in logs_file)
|
||||
else:
|
||||
logs_reader = csv.DictReader(logs_file)
|
||||
converters = [IntToStringItemConverter(keys=['value'])] if values_as_strings else []
|
||||
job = ExtractTokenTransfersJob(
|
||||
logs_iterable=logs_reader,
|
||||
batch_size=batch_size,
|
||||
max_workers=max_workers,
|
||||
item_exporter=token_transfers_item_exporter(output, converters=converters))
|
||||
|
||||
job.run()
|
||||
66
ethereumetl/cli/extract_tokens.py
Normal file
66
ethereumetl/cli/extract_tokens.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import csv
|
||||
import json
|
||||
|
||||
import click
|
||||
from blockchainetl.csv_utils import set_max_field_size_limit
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from blockchainetl.jobs.exporters.converters.int_to_string_item_converter import IntToStringItemConverter
|
||||
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
|
||||
from ethereumetl.jobs.extract_tokens_job import ExtractTokensJob
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.web3_utils import build_web3
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-c', '--contracts', type=str, required=True, help='The JSON file containing contracts.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The maximum number of workers.')
|
||||
@click.option('--values-as-strings', default=False, show_default=True, is_flag=True, help='Whether to convert values to strings.')
|
||||
def extract_tokens(contracts, provider_uri, output, max_workers, values_as_strings=False):
|
||||
"""Extracts tokens from contracts file."""
|
||||
|
||||
set_max_field_size_limit()
|
||||
|
||||
with smart_open(contracts, 'r') as contracts_file:
|
||||
if contracts.endswith('.json'):
|
||||
contracts_iterable = (json.loads(line) for line in contracts_file)
|
||||
else:
|
||||
contracts_iterable = csv.DictReader(contracts_file)
|
||||
converters = [IntToStringItemConverter(keys=['decimals', 'total_supply'])] if values_as_strings else []
|
||||
job = ExtractTokensJob(
|
||||
contracts_iterable=contracts_iterable,
|
||||
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
|
||||
max_workers=max_workers,
|
||||
item_exporter=tokens_item_exporter(output, converters))
|
||||
|
||||
job.run()
|
||||
37
ethereumetl/cli/filter_items.py
Normal file
37
ethereumetl/cli/filter_items.py
Normal file
@@ -0,0 +1,37 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl import misc_utils
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-i', '--input', default='-', show_default=True, type=str, help='The input file. If not specified stdin is used.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-p', '--predicate', required=True, type=str,
|
||||
help='Predicate in Python code e.g. "item[\'is_erc20\']".')
|
||||
def filter_items(input, output, predicate):
|
||||
"""Filters rows in given CSV or JSON newline-delimited file."""
|
||||
def evaluated_predicate(item):
|
||||
return eval(predicate, globals(), {'item': item})
|
||||
misc_utils.filter_items(input, output, evaluated_predicate)
|
||||
56
ethereumetl/cli/get_block_range_for_date.py
Normal file
56
ethereumetl/cli/get_block_range_for_date.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import click
|
||||
|
||||
from datetime import datetime
|
||||
from ethereumetl.web3_utils import build_web3
|
||||
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.service.eth_service import EthService
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-d', '--date', required=True, type=lambda d: datetime.strptime(d, '%Y-%m-%d'),
|
||||
help='The date e.g. 2018-01-01.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def get_block_range_for_date(provider_uri, date, output, chain='ethereum'):
|
||||
"""Outputs start and end blocks for given date."""
|
||||
provider_uri = check_classic_provider_uri(chain, provider_uri)
|
||||
provider = get_provider_from_uri(provider_uri)
|
||||
web3 = build_web3(provider)
|
||||
eth_service = EthService(web3)
|
||||
|
||||
start_block, end_block = eth_service.get_block_range_for_date(date)
|
||||
|
||||
with smart_open(output, 'w') as output_file:
|
||||
output_file.write('{},{}\n'.format(start_block, end_block))
|
||||
55
ethereumetl/cli/get_block_range_for_timestamps.py
Normal file
55
ethereumetl/cli/get_block_range_for_timestamps.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import click
|
||||
|
||||
from ethereumetl.web3_utils import build_web3
|
||||
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.service.eth_service import EthService
|
||||
from ethereumetl.utils import check_classic_provider_uri
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-s', '--start-timestamp', required=True, type=int, help='Start unix timestamp, in seconds.')
|
||||
@click.option('-e', '--end-timestamp', required=True, type=int, help='End unix timestamp, in seconds.')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.option('-c', '--chain', default='ethereum', show_default=True, type=str, help='The chain network to connect to.')
|
||||
def get_block_range_for_timestamps(provider_uri, start_timestamp, end_timestamp, output, chain='ethereum'):
|
||||
"""Outputs start and end blocks for given timestamps."""
|
||||
provider_uri = check_classic_provider_uri(chain, provider_uri)
|
||||
provider = get_provider_from_uri(provider_uri)
|
||||
web3 = build_web3(provider)
|
||||
eth_service = EthService(web3)
|
||||
|
||||
start_block, end_block = eth_service.get_block_range_for_timestamps(start_timestamp, end_timestamp)
|
||||
|
||||
with smart_open(output, 'w') as output_file:
|
||||
output_file.write('{},{}\n'.format(start_block, end_block))
|
||||
@@ -21,23 +21,21 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import argparse
|
||||
import click
|
||||
|
||||
from eth_utils import keccak
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from blockchainetl.logging_utils import logging_basic_config
|
||||
|
||||
logging_basic_config()
|
||||
|
||||
parser = argparse.ArgumentParser(description='Outputs the 32-byte keccak hash of the given string.')
|
||||
parser.add_argument('-i', '--input-string', default='Transfer(address,address,uint256)', type=str,
|
||||
help='String to hash, e.g. Transfer(address,address,uint256)')
|
||||
parser.add_argument('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.')
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-i', '--input-string', default='Transfer(address,address,uint256)', show_default=True, type=str,
|
||||
help='String to hash, e.g. Transfer(address,address,uint256)')
|
||||
@click.option('-o', '--output', default='-', show_default=True, type=str, help='The output file. If not specified stdout is used.')
|
||||
def get_keccak_hash(input_string, output):
|
||||
"""Outputs 32-byte Keccak hash of given string."""
|
||||
hash = keccak(text=input_string)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
hash = keccak(text=args.input_string)
|
||||
|
||||
with smart_open(args.output, 'w') as output_file:
|
||||
output_file.write('0x{}\n'.format(hash.hex()))
|
||||
with smart_open(output, 'w') as output_file:
|
||||
output_file.write('0x{}\n'.format(hash.hex()))
|
||||
104
ethereumetl/cli/stream.py
Normal file
104
ethereumetl/cli/stream.py
Normal file
@@ -0,0 +1,104 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
import logging
|
||||
import random
|
||||
|
||||
import click
|
||||
from blockchainetl.streaming.streaming_utils import configure_signals, configure_logging
|
||||
from ethereumetl.enumeration.entity_type import EntityType
|
||||
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.streaming.item_exporter_creator import create_item_exporters
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
|
||||
|
||||
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
|
||||
@click.option('-l', '--last-synced-block-file', default='last_synced_block.txt', show_default=True, type=str, help='')
|
||||
@click.option('--lag', default=0, show_default=True, type=int, help='The number of blocks to lag behind the network.')
|
||||
@click.option('-p', '--provider-uri', default='https://mainnet.infura.io', show_default=True, type=str,
|
||||
help='The URI of the web3 provider e.g. '
|
||||
'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io')
|
||||
@click.option('-o', '--output', type=str,
|
||||
help='Either Google PubSub topic path e.g. projects/your-project/topics/crypto_ethereum; '
|
||||
'or Postgres connection url e.g. postgresql+pg8000://postgres:admin@127.0.0.1:5432/ethereum; '
|
||||
'or GCS bucket e.g. gs://your-bucket-name; '
|
||||
'or kafka, output name and connection host:port e.g. kafka/127.0.0.1:9092 '
|
||||
'or Kinesis, e.g. kinesis://your-data-stream-name'
|
||||
'If not specified will print to console')
|
||||
@click.option('-s', '--start-block', default=None, show_default=True, type=int, help='Start block')
|
||||
@click.option('-e', '--entity-types', default=','.join(EntityType.ALL_FOR_INFURA), show_default=True, type=str,
|
||||
help='The list of entity types to export.')
|
||||
@click.option('--period-seconds', default=10, show_default=True, type=int, help='How many seconds to sleep between syncs')
|
||||
@click.option('-b', '--batch-size', default=10, show_default=True, type=int, help='How many blocks to batch in single request')
|
||||
@click.option('-B', '--block-batch-size', default=1, show_default=True, type=int, help='How many blocks to batch in single sync round')
|
||||
@click.option('-w', '--max-workers', default=5, show_default=True, type=int, help='The number of workers')
|
||||
@click.option('--log-file', default=None, show_default=True, type=str, help='Log file')
|
||||
@click.option('--pid-file', default=None, show_default=True, type=str, help='pid file')
|
||||
def stream(last_synced_block_file, lag, provider_uri, output, start_block, entity_types,
|
||||
period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None):
|
||||
"""Streams all data types to console or Google Pub/Sub."""
|
||||
configure_logging(log_file)
|
||||
configure_signals()
|
||||
entity_types = parse_entity_types(entity_types)
|
||||
|
||||
from ethereumetl.streaming.eth_streamer_adapter import EthStreamerAdapter
|
||||
from blockchainetl.streaming.streamer import Streamer
|
||||
|
||||
# TODO: Implement fallback mechanism for provider uris instead of picking randomly
|
||||
provider_uri = pick_random_provider_uri(provider_uri)
|
||||
logging.info('Using ' + provider_uri)
|
||||
|
||||
streamer_adapter = EthStreamerAdapter(
|
||||
batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)),
|
||||
item_exporter=create_item_exporters(output),
|
||||
batch_size=batch_size,
|
||||
max_workers=max_workers,
|
||||
entity_types=entity_types
|
||||
)
|
||||
streamer = Streamer(
|
||||
blockchain_streamer_adapter=streamer_adapter,
|
||||
last_synced_block_file=last_synced_block_file,
|
||||
lag=lag,
|
||||
start_block=start_block,
|
||||
period_seconds=period_seconds,
|
||||
block_batch_size=block_batch_size,
|
||||
pid_file=pid_file
|
||||
)
|
||||
streamer.stream()
|
||||
|
||||
|
||||
def parse_entity_types(entity_types):
|
||||
entity_types = [c.strip() for c in entity_types.split(',')]
|
||||
|
||||
# validate passed types
|
||||
for entity_type in entity_types:
|
||||
if entity_type not in EntityType.ALL_FOR_STREAMING:
|
||||
raise click.BadOptionUsage(
|
||||
'--entity-type', '{} is not an available entity type. Supply a comma separated list of types from {}'
|
||||
.format(entity_type, ','.join(EntityType.ALL_FOR_STREAMING)))
|
||||
|
||||
return entity_types
|
||||
|
||||
|
||||
def pick_random_provider_uri(provider_uri):
|
||||
provider_uris = [uri.strip() for uri in provider_uri.split(',')]
|
||||
return random.choice(provider_uris)
|
||||
@@ -40,6 +40,12 @@ class EthBlock(object):
|
||||
self.gas_limit = None
|
||||
self.gas_used = None
|
||||
self.timestamp = None
|
||||
self.withdrawals_root = None
|
||||
|
||||
self.transactions = []
|
||||
self.transaction_count = 0
|
||||
self.base_fee_per_gas = 0
|
||||
self.withdrawals = []
|
||||
|
||||
self.blob_gas_used = None
|
||||
self.excess_blob_gas = None
|
||||
|
||||
@@ -28,3 +28,4 @@ class EthContract(object):
|
||||
self.function_sighashes = []
|
||||
self.is_erc20 = False
|
||||
self.is_erc721 = False
|
||||
self.block_number = None
|
||||
|
||||
27
ethereumetl/domain/geth_trace.py
Normal file
27
ethereumetl/domain/geth_trace.py
Normal file
@@ -0,0 +1,27 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeniy Filatov, evgeniyfilatov@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
class EthGethTrace(object):
|
||||
def __init__(self):
|
||||
self.block_number = None
|
||||
self.transaction_traces = None
|
||||
32
ethereumetl/domain/origin.py
Normal file
32
ethereumetl/domain/origin.py
Normal file
@@ -0,0 +1,32 @@
|
||||
class OriginMarketplaceListing(object):
|
||||
def __init__(self):
|
||||
self.listing_id = None
|
||||
self.ipfs_hash = None
|
||||
self.listing_type = None
|
||||
self.category = None
|
||||
self.subcategory = None
|
||||
self.language = None
|
||||
self.title = None
|
||||
self.description = None
|
||||
self.price = None
|
||||
self.currency = None
|
||||
self.block_number = None
|
||||
self.log_index = None
|
||||
|
||||
class OriginShopProduct(object):
|
||||
def __init__(self):
|
||||
self.listing_id = None
|
||||
self.product_id = None
|
||||
self.ipfs_path = None
|
||||
self.external_id = None
|
||||
self.parent_external_id = None
|
||||
self.title = None
|
||||
self.description = None
|
||||
self.price = None
|
||||
self.currency = None
|
||||
self.image = None
|
||||
self.option1 = None
|
||||
self.option2 = None
|
||||
self.option3 = None
|
||||
self.block_number = None
|
||||
self.log_index = None
|
||||
@@ -33,3 +33,10 @@ class EthReceipt(object):
|
||||
self.logs = []
|
||||
self.root = None
|
||||
self.status = None
|
||||
self.effective_gas_price = None
|
||||
self.l1_fee = None
|
||||
self.l1_gas_used = None
|
||||
self.l1_gas_price = None
|
||||
self.l1_fee_scalar = None
|
||||
self.blob_gas_price = None
|
||||
self.blob_gas_used = None
|
||||
|
||||
@@ -28,3 +28,4 @@ class EthToken(object):
|
||||
self.name = None
|
||||
self.decimals = None
|
||||
self.total_supply = None
|
||||
self.block_number = None
|
||||
|
||||
44
ethereumetl/domain/trace.py
Normal file
44
ethereumetl/domain/trace.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeniy Filatov, evgeniyfilatov@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
class EthTrace(object):
|
||||
def __init__(self):
|
||||
self.block_number = None
|
||||
self.transaction_hash = None
|
||||
self.transaction_index = None
|
||||
self.from_address = None
|
||||
self.to_address = None
|
||||
self.value = None
|
||||
self.input = None
|
||||
self.output = None
|
||||
self.trace_type = None
|
||||
self.call_type = None
|
||||
self.reward_type = None
|
||||
self.gas = None
|
||||
self.gas_used = None
|
||||
self.subtraces = 0
|
||||
self.trace_address = None
|
||||
self.error = None
|
||||
self.status = None
|
||||
self.trace_id = None
|
||||
self.trace_index = None
|
||||
@@ -34,3 +34,8 @@ class EthTransaction(object):
|
||||
self.gas = None
|
||||
self.gas_price = None
|
||||
self.input = None
|
||||
self.max_fee_per_gas = None
|
||||
self.max_priority_fee_per_gas = None
|
||||
self.transaction_type = None
|
||||
self.max_fee_per_blob_gas = None
|
||||
self.blob_versioned_hashes = []
|
||||
|
||||
0
ethereumetl/enumeration/__init__.py
Normal file
0
ethereumetl/enumeration/__init__.py
Normal file
12
ethereumetl/enumeration/entity_type.py
Normal file
12
ethereumetl/enumeration/entity_type.py
Normal file
@@ -0,0 +1,12 @@
|
||||
class EntityType:
|
||||
BLOCK = 'block'
|
||||
TRANSACTION = 'transaction'
|
||||
RECEIPT = 'receipt'
|
||||
LOG = 'log'
|
||||
TOKEN_TRANSFER = 'token_transfer'
|
||||
TRACE = 'trace'
|
||||
CONTRACT = 'contract'
|
||||
TOKEN = 'token'
|
||||
|
||||
ALL_FOR_STREAMING = [BLOCK, TRANSACTION, LOG, TOKEN_TRANSFER, TRACE, CONTRACT, TOKEN]
|
||||
ALL_FOR_INFURA = [BLOCK, TRANSACTION, LOG, TOKEN_TRANSFER]
|
||||
@@ -239,6 +239,109 @@ ERC20_ABI = json.loads('''
|
||||
],
|
||||
"name": "Approval",
|
||||
"type": "event"
|
||||
},
|
||||
{
|
||||
"constant": true,
|
||||
"inputs": [],
|
||||
"name": "NAME",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "",
|
||||
"type": "string"
|
||||
}
|
||||
],
|
||||
"payable": false,
|
||||
"stateMutability": "view",
|
||||
"type": "function"
|
||||
},
|
||||
{
|
||||
"constant": true,
|
||||
"inputs": [],
|
||||
"name": "SYMBOL",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "",
|
||||
"type": "string"
|
||||
}
|
||||
],
|
||||
"payable": false,
|
||||
"stateMutability": "view",
|
||||
"type": "function"
|
||||
},
|
||||
{
|
||||
"constant": true,
|
||||
"inputs": [],
|
||||
"name": "DECIMALS",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "",
|
||||
"type": "uint8"
|
||||
}
|
||||
],
|
||||
"payable": false,
|
||||
"stateMutability": "view",
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
''')
|
||||
|
||||
ERC20_ABI_ALTERNATIVE_1 = json.loads('''
|
||||
[
|
||||
{
|
||||
"constant": true,
|
||||
"inputs": [],
|
||||
"name": "symbol",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "",
|
||||
"type": "bytes32"
|
||||
}
|
||||
],
|
||||
"payable": false,
|
||||
"stateMutability": "view",
|
||||
"type": "function"
|
||||
},
|
||||
{
|
||||
"constant": true,
|
||||
"inputs": [],
|
||||
"name": "SYMBOL",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "",
|
||||
"type": "bytes32"
|
||||
}
|
||||
],
|
||||
"payable": false,
|
||||
"stateMutability": "view",
|
||||
"type": "function"
|
||||
},
|
||||
{
|
||||
"constant": true,
|
||||
"inputs": [],
|
||||
"name": "name",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "",
|
||||
"type": "bytes32"
|
||||
}
|
||||
],
|
||||
"payable": false,
|
||||
"stateMutability": "view",
|
||||
"type": "function"
|
||||
},
|
||||
{
|
||||
"constant": true,
|
||||
"inputs": [],
|
||||
"name": "NAME",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "",
|
||||
"type": "bytes32"
|
||||
}
|
||||
],
|
||||
"payable": false,
|
||||
"stateMutability": "view",
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
''')
|
||||
23
ethereumetl/executors/__init__.py
Normal file
23
ethereumetl/executors/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
@@ -20,47 +20,93 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
from requests.exceptions import Timeout as RequestsTimeout, HTTPError, TooManyRedirects
|
||||
from web3.utils.threads import Timeout as Web3Timeout
|
||||
from web3._utils.threads import Timeout as Web3Timeout
|
||||
|
||||
from ethereumetl.executors.bounded_executor import BoundedExecutor
|
||||
from ethereumetl.executors.fail_safe_executor import FailSafeExecutor
|
||||
from ethereumetl.misc.retriable_value_error import RetriableValueError
|
||||
from ethereumetl.progress_logger import ProgressLogger
|
||||
from ethereumetl.utils import dynamic_batch_iterator
|
||||
|
||||
RETRY_EXCEPTIONS = (ConnectionError, HTTPError, RequestsTimeout, TooManyRedirects, Web3Timeout, OSError)
|
||||
RETRY_EXCEPTIONS = (ConnectionError, HTTPError, RequestsTimeout, TooManyRedirects, Web3Timeout, OSError,
|
||||
RetriableValueError)
|
||||
|
||||
BATCH_CHANGE_COOLDOWN_PERIOD_SECONDS = 2 * 60
|
||||
|
||||
|
||||
# Executes the given work in batches, reducing the batch size exponentially in case of errors.
|
||||
class BatchWorkExecutor:
|
||||
def __init__(self, starting_batch_size, max_workers, retry_exceptions=RETRY_EXCEPTIONS):
|
||||
def __init__(self, starting_batch_size, max_workers, retry_exceptions=RETRY_EXCEPTIONS, max_retries=5):
|
||||
self.batch_size = starting_batch_size
|
||||
self.max_batch_size = starting_batch_size
|
||||
self.latest_batch_size_change_time = None
|
||||
self.max_workers = max_workers
|
||||
# Using bounded executor prevents unlimited queue growth
|
||||
# and allows monitoring in-progress futures and failing fast in case of errors.
|
||||
self.executor = FailSafeExecutor(BoundedExecutor(1, self.max_workers))
|
||||
self.retry_exceptions = retry_exceptions
|
||||
self.max_retries = max_retries
|
||||
self.progress_logger = ProgressLogger()
|
||||
self.logger = logging.getLogger('BatchWorkExecutor')
|
||||
|
||||
def execute(self, work_iterable, work_handler, total_items=None):
|
||||
self.progress_logger.start(total_items=total_items)
|
||||
for batch in dynamic_batch_iterator(work_iterable, lambda: self.batch_size):
|
||||
self.executor.submit(self._fail_safe_execute, work_handler, batch)
|
||||
|
||||
# Check race conditions
|
||||
def _fail_safe_execute(self, work_handler, batch):
|
||||
try:
|
||||
work_handler(batch)
|
||||
self._try_increase_batch_size(len(batch))
|
||||
except self.retry_exceptions:
|
||||
batch_size = self.batch_size
|
||||
# Reduce the batch size. Subsequent batches will be 2 times smaller
|
||||
if batch_size == len(batch) and batch_size > 1:
|
||||
self.batch_size = int(batch_size / 2)
|
||||
# For the failed batch try handling items one by one
|
||||
self.logger.exception('An exception occurred while executing work_handler.')
|
||||
self._try_decrease_batch_size(len(batch))
|
||||
self.logger.info('The batch of size {} will be retried one item at a time.'.format(len(batch)))
|
||||
for item in batch:
|
||||
work_handler([item])
|
||||
execute_with_retries(work_handler, [item],
|
||||
max_retries=self.max_retries, retry_exceptions=self.retry_exceptions)
|
||||
|
||||
self.progress_logger.track(len(batch))
|
||||
|
||||
# Some acceptable race conditions are possible
|
||||
def _try_decrease_batch_size(self, current_batch_size):
|
||||
batch_size = self.batch_size
|
||||
if batch_size == current_batch_size and batch_size > 1:
|
||||
new_batch_size = int(current_batch_size / 2)
|
||||
self.logger.info('Reducing batch size to {}.'.format(new_batch_size))
|
||||
self.batch_size = new_batch_size
|
||||
self.latest_batch_size_change_time = time.time()
|
||||
|
||||
def _try_increase_batch_size(self, current_batch_size):
|
||||
if current_batch_size * 2 <= self.max_batch_size:
|
||||
current_time = time.time()
|
||||
latest_batch_size_change_time = self.latest_batch_size_change_time
|
||||
seconds_since_last_change = current_time - latest_batch_size_change_time \
|
||||
if latest_batch_size_change_time is not None else 0
|
||||
if seconds_since_last_change > BATCH_CHANGE_COOLDOWN_PERIOD_SECONDS:
|
||||
new_batch_size = current_batch_size * 2
|
||||
self.logger.info('Increasing batch size to {}.'.format(new_batch_size))
|
||||
self.batch_size = new_batch_size
|
||||
self.latest_batch_size_change_time = current_time
|
||||
|
||||
def shutdown(self):
|
||||
self.executor.shutdown()
|
||||
self.progress_logger.finish()
|
||||
|
||||
|
||||
def execute_with_retries(func, *args, max_retries=5, retry_exceptions=RETRY_EXCEPTIONS, sleep_seconds=1):
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
return func(*args)
|
||||
except retry_exceptions:
|
||||
logging.exception('An exception occurred while executing execute_with_retries. Retry #{}'.format(i))
|
||||
if i < max_retries - 1:
|
||||
logging.info('The request will be retried after {} seconds. Retry #{}'.format(sleep_seconds, i))
|
||||
time.sleep(sleep_seconds)
|
||||
continue
|
||||
else:
|
||||
raise
|
||||
|
||||
@@ -44,7 +44,7 @@ class BaseItemExporter(object):
|
||||
self._configure(kwargs)
|
||||
|
||||
def _configure(self, options, dont_fail=False):
|
||||
"""Configure the exporter by poping options from the ``options`` dict.
|
||||
"""Configure the exporter by popping options from the ``options`` dict.
|
||||
If dont_fail is set, it won't raise an exception on unexpected options
|
||||
(useful for using with keyword arguments in subclasses constructors)
|
||||
"""
|
||||
@@ -120,7 +120,7 @@ class CsvItemExporter(BaseItemExporter):
|
||||
def _join_if_needed(self, value):
|
||||
if isinstance(value, (list, tuple)):
|
||||
try:
|
||||
return self._join_multivalued.join(value)
|
||||
return self._join_multivalued.join(str(x) for x in value)
|
||||
except TypeError: # list in value may not contain strings
|
||||
pass
|
||||
return value
|
||||
|
||||
0
ethereumetl/ipfs/__init__.py
Normal file
0
ethereumetl/ipfs/__init__.py
Normal file
31
ethereumetl/ipfs/client.py
Normal file
31
ethereumetl/ipfs/client.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import logging
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger('ipfs')
|
||||
|
||||
IPFS_TIMEOUT = 5 # Timeout in second
|
||||
IPFS_NUM_ATTEMPTS = 3
|
||||
|
||||
# A simple client to fetch content from IPFS gateways.
|
||||
class IpfsClient:
|
||||
def __init__(self, gatewayUrls):
|
||||
self._gatewayUrls = gatewayUrls
|
||||
|
||||
def _get(self, path, json):
|
||||
for i in range(IPFS_NUM_ATTEMPTS):
|
||||
# Round-robin thru the gateways.
|
||||
gatewayUrl = self._gatewayUrls[i % len(self._gatewayUrls)]
|
||||
try:
|
||||
url = "{}/{}".format(gatewayUrl, path)
|
||||
r = requests.get(url, timeout=IPFS_TIMEOUT)
|
||||
r.raise_for_status()
|
||||
return r.json() if json else r.text
|
||||
except Exception as e:
|
||||
logger.error("Attempt #{} - Failed downloading {}: {}".format(i + 1, path, e))
|
||||
raise Exception("IPFS download failure for hash {}".format(path))
|
||||
|
||||
def get(self, path):
|
||||
return self._get(path, False)
|
||||
|
||||
def get_json(self, path):
|
||||
return self._get(path, True)
|
||||
139
ethereumetl/ipfs/origin.py
Normal file
139
ethereumetl/ipfs/origin.py
Normal file
@@ -0,0 +1,139 @@
|
||||
import logging
|
||||
import re
|
||||
|
||||
from ethereumetl.domain.origin import OriginMarketplaceListing, OriginShopProduct
|
||||
from ethereumetl.ipfs.client import IpfsClient
|
||||
|
||||
logger = logging.getLogger('origin')
|
||||
|
||||
IPFS_PRIMARY_GATEWAY_URL = 'https://cf-ipfs.com/ipfs'
|
||||
IPFS_SECONDARY_GATEWAY_URL = 'https://gateway.ipfs.io/ipfs'
|
||||
|
||||
# Returns an IPFS client that can be used to fetch Origin Protocol's data.
|
||||
def get_origin_ipfs_client():
|
||||
return IpfsClient([IPFS_PRIMARY_GATEWAY_URL, IPFS_SECONDARY_GATEWAY_URL])
|
||||
|
||||
|
||||
# Parses the shop's HTML index page to extract the name of the IPFS directory under
|
||||
# which all the shop data is located.
|
||||
def _get_shop_data_dir(shop_index_page):
|
||||
match = re.search('<link rel="data-dir" href="(.+?)"', shop_index_page)
|
||||
return match.group(1) if match else None
|
||||
|
||||
|
||||
# Returns the list of products from an Origin Protocol shop.
|
||||
def _get_origin_shop_products(receipt_log, listing_id, ipfs_client, shop_ipfs_hash):
|
||||
results = []
|
||||
shop_index_page = ipfs_client.get(shop_ipfs_hash + "/index.html")
|
||||
shop_data_dir = _get_shop_data_dir(shop_index_page)
|
||||
|
||||
path = "{}/{}".format(shop_ipfs_hash, shop_data_dir) if shop_data_dir else shop_ipfs_hash
|
||||
logger.debug("Using shop path {}".format(path))
|
||||
|
||||
products_path = "{}/{}".format(path, 'products.json')
|
||||
try:
|
||||
products = ipfs_client.get_json(products_path)
|
||||
except Exception as e:
|
||||
logger.error("Listing {} Failed downloading product {}: {}".format(listing_id, products_path, e))
|
||||
return results
|
||||
|
||||
logger.info("Found {} products in for listing {}".format(len(products), listing_id))
|
||||
|
||||
# Go through all the products from the shop.
|
||||
for product in products:
|
||||
product_id = product.get('id')
|
||||
if not product_id:
|
||||
logger.error('Product entry with missing id in products.json')
|
||||
continue
|
||||
|
||||
logger.info("Processing product {}".format(product_id))
|
||||
|
||||
# Fetch the product details to get the variants.
|
||||
product_base_path = "{}/{}".format(path, product_id)
|
||||
product_data_path = "{}/{}".format(product_base_path, 'data.json')
|
||||
try:
|
||||
product = ipfs_client.get_json(product_data_path)
|
||||
except Exception as e:
|
||||
logger.error("Failed downloading {}: {}".format(product_data_path, e))
|
||||
continue
|
||||
|
||||
# Extract the top product.
|
||||
result = OriginShopProduct()
|
||||
result.block_number = receipt_log.block_number
|
||||
result.log_index = receipt_log.log_index
|
||||
result.listing_id = listing_id
|
||||
result.product_id = "{}-{}".format(listing_id, product_id)
|
||||
result.ipfs_path = product_base_path
|
||||
result.external_id = str(product.get('externalId')) if product.get('externalId') else None
|
||||
result.parent_external_id = None
|
||||
result.title = product.get('title')
|
||||
result.description = product.get('description')
|
||||
result.price = product.get('price')
|
||||
result.currency = product.get('currency', 'fiat-USD')
|
||||
result.option1 = None
|
||||
result.option2 = None
|
||||
result.option3 = None
|
||||
result.image = product.get('image')
|
||||
results.append(result)
|
||||
|
||||
# Extract the variants, if any.
|
||||
variants = product.get('variants', [])
|
||||
if len(variants) > 0:
|
||||
logger.info("Found {} variants".format(len(variants)))
|
||||
for variant in variants:
|
||||
result = OriginShopProduct()
|
||||
result.block_number = receipt_log.block_number
|
||||
result.log_index = receipt_log.log_index
|
||||
result.listing_id = listing_id
|
||||
result.product_id = "{}-{}".format(listing_id, variant.get('id'))
|
||||
result.ipfs_path = product_base_path
|
||||
result.external_id = str(variant.get('externalId')) if variant.get('externalId') else None
|
||||
result.parent_external_id = str(product.get('externalId')) if product.get('externalId') else None
|
||||
result.title = variant.get('title')
|
||||
result.description = product.get('description')
|
||||
result.price = variant.get('price')
|
||||
result.currency = product.get('currency', 'fiat-USD')
|
||||
result.option1 = variant.get('option1')
|
||||
result.option2 = variant.get('option2')
|
||||
result.option3 = variant.get('option3')
|
||||
result.image = variant.get('image')
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
# Returns a listing from the Origin Protocol marketplace.
|
||||
def get_origin_marketplace_data(receipt_log, listing_id, ipfs_client, ipfs_hash):
|
||||
# Load the listing's metadata from IPFS.
|
||||
try:
|
||||
listing_data = ipfs_client.get_json(ipfs_hash)
|
||||
except Exception as e:
|
||||
logger.error("Extraction failed. Listing {} Listing hash {} - {}".format(listing_id, ipfs_hash, e))
|
||||
return None, []
|
||||
|
||||
# Fill-in an OriginMarketplaceListing object based on the IPFS data.
|
||||
listing = OriginMarketplaceListing()
|
||||
listing.block_number = receipt_log.block_number
|
||||
listing.log_index = receipt_log.log_index
|
||||
listing.listing_id = str(listing_id)
|
||||
listing.ipfs_hash = ipfs_hash
|
||||
listing.listing_type = listing_data.get('listingType', '')
|
||||
listing.category = listing_data.get('category', '')
|
||||
listing.subcategory = listing_data.get('subCategory', '')
|
||||
listing.language = listing_data.get('language', '')
|
||||
listing.title = listing_data.get('title', '')
|
||||
listing.description = listing_data.get('description', '')
|
||||
listing.price = listing_data.get('price', {}).get('amount', '')
|
||||
listing.currency = listing_data.get('price', {}).get('currency', '')
|
||||
|
||||
# If it is a shop listing, also extract all of the shop data.
|
||||
shop_listings = []
|
||||
shop_ipfs_hash = listing_data.get('shopIpfsHash')
|
||||
if shop_ipfs_hash:
|
||||
try:
|
||||
shop_listings = _get_origin_shop_products(receipt_log, listing_id, ipfs_client, shop_ipfs_hash)
|
||||
except Exception as e:
|
||||
logger.error("Extraction failed. Listing {} Shop hash {} - {}".format(listing_id, shop_ipfs_hash, e))
|
||||
|
||||
return listing, shop_listings
|
||||
|
||||
|
||||
23
ethereumetl/jobs/__init__.py
Normal file
23
ethereumetl/jobs/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeny Medvedev, evge.medvedev@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
@@ -24,12 +24,11 @@
|
||||
import csv
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from time import time
|
||||
|
||||
from web3 import Web3
|
||||
|
||||
from ethereumetl.csv_utils import set_max_field_size_limit
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from blockchainetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.export_blocks_job import ExportBlocksJob
|
||||
from ethereumetl.jobs.export_contracts_job import ExportContractsJob
|
||||
from ethereumetl.jobs.export_receipts_job import ExportReceiptsJob
|
||||
@@ -40,11 +39,10 @@ from ethereumetl.jobs.exporters.contracts_item_exporter import contracts_item_ex
|
||||
from ethereumetl.jobs.exporters.receipts_and_logs_item_exporter import receipts_and_logs_item_exporter
|
||||
from ethereumetl.jobs.exporters.token_transfers_item_exporter import token_transfers_item_exporter
|
||||
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.web3_utils import build_web3
|
||||
|
||||
logging_basic_config()
|
||||
logger = logging.getLogger('export_all')
|
||||
|
||||
|
||||
@@ -65,7 +63,7 @@ def extract_csv_column_unique(input, output, column):
|
||||
output_file.write(row[column] + '\n')
|
||||
|
||||
|
||||
def export_all(partitions, output_dir, provider_uri, max_workers, batch_size):
|
||||
def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_size):
|
||||
|
||||
for batch_start_block, batch_end_block, partition_dir in partitions:
|
||||
# # # start # # #
|
||||
@@ -74,21 +72,45 @@ def export_all(partitions, output_dir, provider_uri, max_workers, batch_size):
|
||||
|
||||
padded_batch_start_block = str(batch_start_block).zfill(8)
|
||||
padded_batch_end_block = str(batch_end_block).zfill(8)
|
||||
block_range = f'{padded_batch_start_block}-{padded_batch_end_block}'
|
||||
file_name_suffix = f'{padded_batch_start_block}_{padded_batch_end_block}'
|
||||
block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format(
|
||||
padded_batch_start_block=padded_batch_start_block,
|
||||
padded_batch_end_block=padded_batch_end_block,
|
||||
)
|
||||
file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format(
|
||||
padded_batch_start_block=padded_batch_start_block,
|
||||
padded_batch_end_block=padded_batch_end_block,
|
||||
)
|
||||
|
||||
# # # blocks_and_transactions # # #
|
||||
|
||||
blocks_output_dir = f'{output_dir}/blocks{partition_dir}'
|
||||
blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)
|
||||
|
||||
transactions_output_dir = f'{output_dir}/transactions{partition_dir}'
|
||||
transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)
|
||||
|
||||
blocks_file = f'{blocks_output_dir}/blocks_{file_name_suffix}.csv'
|
||||
transactions_file = f'{transactions_output_dir}/transactions_{file_name_suffix}.csv'
|
||||
logger.info(f'Exporting blocks {block_range} to {blocks_file}')
|
||||
logger.info(f'Exporting transactions from blocks {block_range} to {transactions_file}')
|
||||
blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.csv'.format(
|
||||
blocks_output_dir=blocks_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.csv'.format(
|
||||
transactions_output_dir=transactions_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Exporting blocks {block_range} to {blocks_file}'.format(
|
||||
block_range=block_range,
|
||||
blocks_file=blocks_file,
|
||||
))
|
||||
logger.info('Exporting transactions from blocks {block_range} to {transactions_file}'.format(
|
||||
block_range=block_range,
|
||||
transactions_file=transactions_file,
|
||||
))
|
||||
|
||||
job = ExportBlocksJob(
|
||||
start_block=batch_start_block,
|
||||
@@ -105,39 +127,72 @@ def export_all(partitions, output_dir, provider_uri, max_workers, batch_size):
|
||||
|
||||
token_transfers_file = None
|
||||
if is_log_filter_supported(provider_uri):
|
||||
token_transfers_output_dir = f'{output_dir}/token_transfers{partition_dir}'
|
||||
token_transfers_output_dir = '{output_dir}/token_transfers{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(token_transfers_output_dir), exist_ok=True)
|
||||
|
||||
token_transfers_file = f'{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'
|
||||
logger.info(f'Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}')
|
||||
token_transfers_file = '{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'.format(
|
||||
token_transfers_output_dir=token_transfers_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}'.format(
|
||||
block_range=block_range,
|
||||
token_transfers_file=token_transfers_file,
|
||||
))
|
||||
|
||||
job = ExportTokenTransfersJob(
|
||||
start_block=batch_start_block,
|
||||
end_block=batch_end_block,
|
||||
batch_size=batch_size,
|
||||
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
|
||||
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
|
||||
item_exporter=token_transfers_item_exporter(token_transfers_file),
|
||||
max_workers=max_workers)
|
||||
job.run()
|
||||
|
||||
# # # receipts_and_logs # # #
|
||||
|
||||
transaction_hashes_output_dir = f'{output_dir}/transaction_hashes{partition_dir}'
|
||||
os.makedirs(os.path.dirname(transaction_hashes_output_dir), exist_ok=True)
|
||||
cache_output_dir = '{output_dir}/.tmp{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(cache_output_dir), exist_ok=True)
|
||||
|
||||
transaction_hashes_file = f'{transaction_hashes_output_dir}/transaction_hashes_{file_name_suffix}.csv'
|
||||
logger.info(f'Extracting hash column from transaction file {transactions_file}')
|
||||
transaction_hashes_file = '{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'.format(
|
||||
cache_output_dir=cache_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Extracting hash column from transaction file {transactions_file}'.format(
|
||||
transactions_file=transactions_file,
|
||||
))
|
||||
extract_csv_column_unique(transactions_file, transaction_hashes_file, 'hash')
|
||||
|
||||
receipts_output_dir = f'{output_dir}/receipts{partition_dir}'
|
||||
receipts_output_dir = '{output_dir}/receipts{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(receipts_output_dir), exist_ok=True)
|
||||
|
||||
logs_output_dir = f'{output_dir}/logs{partition_dir}'
|
||||
logs_output_dir = '{output_dir}/logs{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(logs_output_dir), exist_ok=True)
|
||||
|
||||
receipts_file = f'{receipts_output_dir}/receipts_{file_name_suffix}.csv'
|
||||
logs_file = f'{logs_output_dir}/logs_{file_name_suffix}.csv'
|
||||
logger.info(f'Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}')
|
||||
receipts_file = '{receipts_output_dir}/receipts_{file_name_suffix}.csv'.format(
|
||||
receipts_output_dir=receipts_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logs_file = '{logs_output_dir}/logs_{file_name_suffix}.csv'.format(
|
||||
logs_output_dir=logs_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}'.format(
|
||||
block_range=block_range,
|
||||
receipts_file=receipts_file,
|
||||
logs_file=logs_file,
|
||||
))
|
||||
|
||||
with smart_open(transaction_hashes_file, 'r') as transaction_hashes:
|
||||
job = ExportReceiptsJob(
|
||||
@@ -152,18 +207,29 @@ def export_all(partitions, output_dir, provider_uri, max_workers, batch_size):
|
||||
|
||||
# # # contracts # # #
|
||||
|
||||
contract_addresses_output_dir = f'{output_dir}/contract_addresses{partition_dir}'
|
||||
os.makedirs(os.path.dirname(contract_addresses_output_dir), exist_ok=True)
|
||||
|
||||
contract_addresses_file = f'{contract_addresses_output_dir}/contract_addresses_{file_name_suffix}.csv'
|
||||
logger.info(f'Extracting contract_address from receipt file {receipts_file}')
|
||||
contract_addresses_file = '{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'.format(
|
||||
cache_output_dir=cache_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Extracting contract_address from receipt file {receipts_file}'.format(
|
||||
receipts_file=receipts_file
|
||||
))
|
||||
extract_csv_column_unique(receipts_file, contract_addresses_file, 'contract_address')
|
||||
|
||||
contracts_output_dir = f'{output_dir}/contracts{partition_dir}'
|
||||
contracts_output_dir = '{output_dir}/contracts{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(contracts_output_dir), exist_ok=True)
|
||||
|
||||
contracts_file = f'{contracts_output_dir}/contracts_{file_name_suffix}.csv'
|
||||
logger.info(f'Exporting contracts from blocks {block_range} to {contracts_file}')
|
||||
contracts_file = '{contracts_output_dir}/contracts_{file_name_suffix}.csv'.format(
|
||||
contracts_output_dir=contracts_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Exporting contracts from blocks {block_range} to {contracts_file}'.format(
|
||||
block_range=block_range,
|
||||
contracts_file=contracts_file,
|
||||
))
|
||||
|
||||
with smart_open(contract_addresses_file, 'r') as contract_addresses_file:
|
||||
contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file
|
||||
@@ -179,29 +245,43 @@ def export_all(partitions, output_dir, provider_uri, max_workers, batch_size):
|
||||
# # # tokens # # #
|
||||
|
||||
if token_transfers_file is not None:
|
||||
token_addresses_output_dir = f'{output_dir}/token_addresses{partition_dir}'
|
||||
os.makedirs(os.path.dirname(token_addresses_output_dir), exist_ok=True)
|
||||
|
||||
token_addresses_file = f'{token_addresses_output_dir}/token_addresses_{file_name_suffix}'
|
||||
logger.info(f'Extracting token_address from token_transfers file {token_transfers_file}')
|
||||
token_addresses_file = '{cache_output_dir}/token_addresses_{file_name_suffix}'.format(
|
||||
cache_output_dir=cache_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Extracting token_address from token_transfers file {token_transfers_file}'.format(
|
||||
token_transfers_file=token_transfers_file,
|
||||
))
|
||||
extract_csv_column_unique(token_transfers_file, token_addresses_file, 'token_address')
|
||||
|
||||
tokens_output_dir = f'{output_dir}/tokens{partition_dir}'
|
||||
tokens_output_dir = '{output_dir}/tokens{partition_dir}'.format(
|
||||
output_dir=output_dir,
|
||||
partition_dir=partition_dir,
|
||||
)
|
||||
os.makedirs(os.path.dirname(tokens_output_dir), exist_ok=True)
|
||||
|
||||
tokens_file = f'{tokens_output_dir}/tokens_{file_name_suffix}.csv'
|
||||
logger.info(f'Exporting tokens from blocks {block_range} to {tokens_file}')
|
||||
tokens_file = '{tokens_output_dir}/tokens_{file_name_suffix}.csv'.format(
|
||||
tokens_output_dir=tokens_output_dir,
|
||||
file_name_suffix=file_name_suffix,
|
||||
)
|
||||
logger.info('Exporting tokens from blocks {block_range} to {tokens_file}'.format(
|
||||
block_range=block_range,
|
||||
tokens_file=tokens_file,
|
||||
))
|
||||
|
||||
with smart_open(token_addresses_file, 'r') as token_addresses:
|
||||
job = ExportTokensJob(
|
||||
token_addresses_iterable=(token_address.strip() for token_address in token_addresses),
|
||||
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))),
|
||||
web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
|
||||
item_exporter=tokens_item_exporter(tokens_file),
|
||||
max_workers=max_workers)
|
||||
job.run()
|
||||
|
||||
# # # finish # # #
|
||||
|
||||
shutil.rmtree(os.path.dirname(cache_output_dir))
|
||||
end_time = time()
|
||||
time_diff = round(end_time - start_time, 5)
|
||||
logger.info(f'Exporting blocks {block_range} took {time_diff} seconds')
|
||||
logger.info('Exporting blocks {block_range} took {time_diff} seconds'.format(
|
||||
block_range=block_range,
|
||||
time_diff=time_diff,
|
||||
))
|
||||
@@ -24,7 +24,7 @@
|
||||
import json
|
||||
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.jobs.base_job import BaseJob
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.json_rpc_requests import generate_get_block_by_number_json_rpc
|
||||
from ethereumetl.mappers.block_mapper import EthBlockMapper
|
||||
from ethereumetl.mappers.transaction_mapper import EthTransactionMapper
|
||||
@@ -72,7 +72,7 @@ class ExportBlocksJob(BaseJob):
|
||||
|
||||
def _export_batch(self, block_number_batch):
|
||||
blocks_rpc = list(generate_get_block_by_number_json_rpc(block_number_batch, self.export_transactions))
|
||||
response = self.batch_web3_provider.make_request(json.dumps(blocks_rpc))
|
||||
response = self.batch_web3_provider.make_batch_request(json.dumps(blocks_rpc))
|
||||
results = rpc_response_batch_to_results(response)
|
||||
blocks = [self.block_mapper.json_dict_to_block(result) for result in results]
|
||||
|
||||
|
||||
@@ -24,14 +24,15 @@
|
||||
import json
|
||||
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.jobs.base_job import BaseJob
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.json_rpc_requests import generate_get_code_json_rpc
|
||||
from ethereumetl.mappers.contract_mapper import EthContractMapper
|
||||
|
||||
# Exports contracts bytecode
|
||||
from ethereumetl.service.eth_contract_service import EthContractService
|
||||
from ethereumetl.utils import rpc_response_to_result
|
||||
|
||||
|
||||
# Exports contracts bytecode
|
||||
class ExportContractsJob(BaseJob):
|
||||
def __init__(
|
||||
self,
|
||||
@@ -57,13 +58,13 @@ class ExportContractsJob(BaseJob):
|
||||
|
||||
def _export_contracts(self, contract_addresses):
|
||||
contracts_code_rpc = list(generate_get_code_json_rpc(contract_addresses))
|
||||
response_batch = self.batch_web3_provider.make_request(json.dumps(contracts_code_rpc))
|
||||
response_batch = self.batch_web3_provider.make_batch_request(json.dumps(contracts_code_rpc))
|
||||
|
||||
contracts = []
|
||||
for response in response_batch:
|
||||
# request id is the index of the contract address in contract_addresses list
|
||||
request_id = response['id']
|
||||
result = response['result']
|
||||
result = rpc_response_to_result(response)
|
||||
|
||||
contract_address = contract_addresses[request_id]
|
||||
contract = self._get_contract(contract_address, result)
|
||||
|
||||
80
ethereumetl/jobs/export_geth_traces_job.py
Normal file
80
ethereumetl/jobs/export_geth_traces_job.py
Normal file
@@ -0,0 +1,80 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Evgeniy Filatov, evgeniyfilatov@gmail.com
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import json
|
||||
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.json_rpc_requests import generate_trace_block_by_number_json_rpc
|
||||
from blockchainetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.mappers.geth_trace_mapper import EthGethTraceMapper
|
||||
from ethereumetl.utils import validate_range, rpc_response_to_result
|
||||
|
||||
|
||||
# Exports geth traces
|
||||
class ExportGethTracesJob(BaseJob):
|
||||
def __init__(
|
||||
self,
|
||||
start_block,
|
||||
end_block,
|
||||
batch_size,
|
||||
batch_web3_provider,
|
||||
max_workers,
|
||||
item_exporter):
|
||||
validate_range(start_block, end_block)
|
||||
self.start_block = start_block
|
||||
self.end_block = end_block
|
||||
|
||||
self.batch_web3_provider = batch_web3_provider
|
||||
|
||||
self.batch_work_executor = BatchWorkExecutor(batch_size, max_workers)
|
||||
self.item_exporter = item_exporter
|
||||
|
||||
self.geth_trace_mapper = EthGethTraceMapper()
|
||||
|
||||
def _start(self):
|
||||
self.item_exporter.open()
|
||||
|
||||
def _export(self):
|
||||
self.batch_work_executor.execute(
|
||||
range(self.start_block, self.end_block + 1),
|
||||
self._export_batch,
|
||||
total_items=self.end_block - self.start_block + 1
|
||||
)
|
||||
|
||||
def _export_batch(self, block_number_batch):
|
||||
trace_block_rpc = list(generate_trace_block_by_number_json_rpc(block_number_batch))
|
||||
response = self.batch_web3_provider.make_batch_request(json.dumps(trace_block_rpc))
|
||||
|
||||
for response_item in response:
|
||||
block_number = response_item.get('id')
|
||||
result = rpc_response_to_result(response_item)
|
||||
|
||||
geth_trace = self.geth_trace_mapper.json_dict_to_geth_trace({
|
||||
'block_number': block_number,
|
||||
'transaction_traces': [tx_trace.get('result') for tx_trace in result],
|
||||
})
|
||||
|
||||
self.item_exporter.export_item(self.geth_trace_mapper.geth_trace_to_dict(geth_trace))
|
||||
|
||||
def _end(self):
|
||||
self.batch_work_executor.shutdown()
|
||||
self.item_exporter.close()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user