mirror of
https://github.com/blockchain-etl/ethereum-etl.git
synced 2026-01-09 13:57:54 -05:00
Rename erc20_token to token
This commit is contained in:
54
README.md
54
README.md
@@ -35,7 +35,7 @@ Read this article https://medium.com/@medvedev1088/exporting-and-analyzing-ether
|
||||
- [receipts.csv](#receiptscsv)
|
||||
- [logs.csv](#logscsv)
|
||||
- [contracts.csv](#contractscsv)
|
||||
- [erc20_tokens.csv](#erc20_tokenscsv)
|
||||
- [tokens.csv](#tokenscsv)
|
||||
- [Exporting the Blockchain](#exporting-the-blockchain)
|
||||
- [Export in 2 Hours](#export-in-2-hours)
|
||||
- [Command Reference](#command-reference)
|
||||
@@ -88,13 +88,13 @@ tx_input | hex_string |
|
||||
|
||||
Column | Type |
|
||||
--------------------|-------------|
|
||||
erc20_token | address |
|
||||
erc20_from | address |
|
||||
erc20_to | address |
|
||||
erc20_value | numeric |
|
||||
erc20_tx_hash | hex_string |
|
||||
erc20_log_index | bigint |
|
||||
erc20_block_number | bigint |
|
||||
token_address | address |
|
||||
from_address | address |
|
||||
to_address | address |
|
||||
value | numeric |
|
||||
tx_hash | hex_string |
|
||||
log_index | bigint |
|
||||
block_number | bigint |
|
||||
|
||||
### receipts.csv
|
||||
|
||||
@@ -133,20 +133,20 @@ contract_function_sighashes | string |
|
||||
contract_is_erc20 | boolean |
|
||||
contract_is_erc721 | boolean |
|
||||
|
||||
### erc20_tokens.csv
|
||||
### tokens.csv
|
||||
|
||||
Column | Type |
|
||||
-----------------------------|-------------|
|
||||
erc20_token_address | address |
|
||||
erc20_token_symbol | string |
|
||||
erc20_token_name | string |
|
||||
erc20_token_decimals | bigint |
|
||||
erc20_token_total_supply | numeric |
|
||||
address | address |
|
||||
symbol | string |
|
||||
name | string |
|
||||
decimals | bigint |
|
||||
total_supply | numeric |
|
||||
|
||||
You can find column descriptions in [schemas/gcp](schemas/gcp)
|
||||
|
||||
Note: `erc20_token_symbol`, `erc20_token_name`, `erc20_token_decimals`, `erc20_token_total_supply`
|
||||
columns in `erc20_tokens.csv` can have empty values in case the contract doesn't implement the corresponding methods
|
||||
Note: `symbol`, `name`, `decimals`, `total_supply`
|
||||
columns in `tokens.csv` can have empty values in case the contract doesn't implement the corresponding methods
|
||||
or implements it incorrectly (e.g. wrong return type).
|
||||
|
||||
Note: for the `address` type all hex characters are lower-cased.
|
||||
@@ -230,7 +230,7 @@ Additional steps:
|
||||
- [extract_token_transfers.py](#extract_token_transferspy)
|
||||
- [export_receipts_and_logs.py](#export_receipts_and_logspy)
|
||||
- [export_contracts.py](#export_contractspy)
|
||||
- [export_erc20_tokens.py](#export_erc20_tokenspy)
|
||||
- [export_tokens.py](#export_tokenspy)
|
||||
- [get_block_range_for_date.py](#get_block_range_for_datepy)
|
||||
|
||||
All the commands accept `-h` parameter for help, e.g.:
|
||||
@@ -332,20 +332,20 @@ Then export contracts:
|
||||
|
||||
You can tune `--batch-size`, `--max-workers` for performance.
|
||||
|
||||
##### export_erc20_tokens.py
|
||||
##### export_tokens.py
|
||||
|
||||
First extract token addresses from `token_transfers.csv`
|
||||
(Exported with [export_token_transfers.py](#export_token_transferspy)):
|
||||
|
||||
```bash
|
||||
> python extract_csv_column.py -i token_transfers.csv -c erc20_token -o - | sort | uniq > erc20_token_addresses.csv
|
||||
> python extract_csv_column.py -i token_transfers.csv -c token_address -o - | sort | uniq > token_addresses.csv
|
||||
```
|
||||
|
||||
Then export ERC20 tokens:
|
||||
|
||||
```bash
|
||||
> python export_erc20_tokens.py --token-addresses erc20_token_addresses.csv \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output erc20_tokens.csv
|
||||
> python export_tokens.py --token-addresses token_addresses.csv \
|
||||
--provider-uri file://$HOME/Library/Ethereum/geth.ipc --output tokens.csv
|
||||
```
|
||||
|
||||
You can tune `--max-workers` for performance.
|
||||
@@ -353,7 +353,7 @@ You can tune `--max-workers` for performance.
|
||||
Note that there will be duplicate tokens across different partitions,
|
||||
which need to be deduplicated (see Querying in Google BigQuery section).
|
||||
|
||||
Upvote this pull request to make erc20_tokens export faster
|
||||
Upvote this pull request to make tokens export faster
|
||||
https://github.com/ethereum/web3.py/pull/944#issuecomment-403957468
|
||||
|
||||
##### get_block_range_for_date.py
|
||||
@@ -394,7 +394,7 @@ CREATE DATABASE ethereumetl;
|
||||
- contracts: [schemas/aws/contracts.sql](schemas/aws/contracts.sql)
|
||||
- receipts: [schemas/aws/receipts.sql](schemas/aws/receipts.sql)
|
||||
- logs: [schemas/aws/logs.sql](schemas/aws/logs.sql)
|
||||
- erc20_tokens: [schemas/aws/erc20_tokens.sql](schemas/aws/erc20_tokens.sql)
|
||||
- tokens: [schemas/aws/tokens.sql](schemas/aws/tokens.sql)
|
||||
|
||||
### Tables for Parquet Files
|
||||
|
||||
@@ -437,7 +437,7 @@ To upload CSVs to BigQuery:
|
||||
> bq --location=US load --replace --source_format=CSV --skip_leading_rows=1 ethereum.receipts gs://<your_bucket>/ethereumetl/export/receipts/*.csv ./schemas/gcp/receipts.json
|
||||
> bq --location=US load --replace --source_format=NEWLINE_DELIMITED_JSON ethereum.logs gs://<your_bucket>/ethereumetl/export/logs/*.json ./schemas/gcp/logs.json
|
||||
> bq --location=US load --replace --source_format=NEWLINE_DELIMITED_JSON ethereum.contracts gs://<your_bucket>/ethereumetl/export/contracts/*.json ./schemas/gcp/contracts.json
|
||||
> bq --location=US load --replace --source_format=CSV --skip_leading_rows=1 --allow_quoted_newlines ethereum.erc20_tokens_duplicates gs://<your_bucket>/ethereumetl/export/erc20_tokens/*.csv ./schemas/gcp/erc20_tokens.json
|
||||
> bq --location=US load --replace --source_format=CSV --skip_leading_rows=1 --allow_quoted_newlines ethereum.tokens_duplicates gs://<your_bucket>/ethereumetl/export/tokens/*.csv ./schemas/gcp/tokens.json
|
||||
```
|
||||
|
||||
Note that NEWLINE_DELIMITED_JSON is used to support REPEATED mode for the columns with lists.
|
||||
@@ -449,11 +449,11 @@ Join `transactions` and `receipts`:
|
||||
> bq --location=US query --replace --destination_table ethereum.transactions_join_receipts --use_legacy_sql=false "$(cat ./schemas/gcp/transactions_join_receipts.sql | tr '\n' ' ')"
|
||||
```
|
||||
|
||||
Deduplicate `erc20_tokens`:
|
||||
Deduplicate `tokens`:
|
||||
|
||||
```bash
|
||||
> bq mk --table --description "Exported using https://github.com/medvedev1088/ethereum-etl" ethereum.erc20_tokens ./schemas/gcp/erc20_tokens.json
|
||||
> bq --location=US query --replace --destination_table ethereum.erc20_tokens --use_legacy_sql=false "$(cat ./schemas/gcp/erc20_tokens_deduplicate.sql | tr '\n' ' ')"
|
||||
> bq mk --table --description "Exported using https://github.com/medvedev1088/ethereum-etl" ethereum.tokens ./schemas/gcp/tokens.json
|
||||
> bq --location=US query --replace --destination_table ethereum.tokens --use_legacy_sql=false "$(cat ./schemas/gcp/tokens_deduplicate.sql | tr '\n' ' ')"
|
||||
```
|
||||
|
||||
### Public Dataset
|
||||
|
||||
@@ -21,10 +21,10 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
class EthErc20Token(object):
|
||||
class EthToken(object):
|
||||
def __init__(self):
|
||||
self.erc20_token_address = None
|
||||
self.erc20_token_symbol = None
|
||||
self.erc20_token_name = None
|
||||
self.erc20_token_decimals = None
|
||||
self.erc20_token_total_supply = None
|
||||
self.address = None
|
||||
self.symbol = None
|
||||
self.name = None
|
||||
self.decimals = None
|
||||
self.total_supply = None
|
||||
@@ -23,10 +23,10 @@
|
||||
|
||||
class EthTokenTransfer(object):
|
||||
def __init__(self):
|
||||
self.erc20_token = None
|
||||
self.erc20_from = None
|
||||
self.erc20_to = None
|
||||
self.erc20_value = None
|
||||
self.erc20_tx_hash = None
|
||||
self.erc20_log_index = None
|
||||
self.erc20_block_number = None
|
||||
self.token_address = None
|
||||
self.from_address = None
|
||||
self.to_address = None
|
||||
self.value = None
|
||||
self.tx_hash = None
|
||||
self.log_index = None
|
||||
self.block_number = None
|
||||
|
||||
@@ -23,18 +23,18 @@
|
||||
|
||||
from ethereumetl.executors.batch_work_executor import BatchWorkExecutor
|
||||
from ethereumetl.jobs.base_job import BaseJob
|
||||
from ethereumetl.mappers.erc20_token_mapper import EthErc20TokenMapper
|
||||
from ethereumetl.service.erc20_token_service import EthErc20TokenService
|
||||
from ethereumetl.mappers.token_mapper import EthTokenMapper
|
||||
from ethereumetl.service.token_service import EthTokenService
|
||||
|
||||
|
||||
class ExportErc20TokensJob(BaseJob):
|
||||
class ExportTokensJob(BaseJob):
|
||||
def __init__(self, web3, item_exporter, token_addresses_iterable, max_workers):
|
||||
self.item_exporter = item_exporter
|
||||
self.token_addresses_iterable = token_addresses_iterable
|
||||
self.batch_work_executor = BatchWorkExecutor(1, max_workers)
|
||||
|
||||
self.erc20_token_service = EthErc20TokenService(web3, clean_user_provided_content)
|
||||
self.erc20_token_mapper = EthErc20TokenMapper()
|
||||
self.token_service = EthTokenService(web3, clean_user_provided_content)
|
||||
self.token_mapper = EthTokenMapper()
|
||||
|
||||
def _start(self):
|
||||
self.item_exporter.open()
|
||||
@@ -47,8 +47,8 @@ class ExportErc20TokensJob(BaseJob):
|
||||
self._export_token(token_address)
|
||||
|
||||
def _export_token(self, token_address):
|
||||
token = self.erc20_token_service.get_token(token_address)
|
||||
token_dict = self.erc20_token_mapper.erc20_token_to_dict(token)
|
||||
token = self.token_service.get_token(token_address)
|
||||
token_dict = self.token_mapper.token_to_dict(token)
|
||||
self.item_exporter.export_item(token_dict)
|
||||
|
||||
def _end(self):
|
||||
@@ -24,13 +24,13 @@
|
||||
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
|
||||
FIELDS_TO_EXPORT = [
|
||||
'erc20_token',
|
||||
'erc20_from',
|
||||
'erc20_to',
|
||||
'erc20_value',
|
||||
'erc20_tx_hash',
|
||||
'erc20_log_index',
|
||||
'erc20_block_number'
|
||||
'token_address',
|
||||
'from_address',
|
||||
'to_address',
|
||||
'value',
|
||||
'tx_hash',
|
||||
'log_index',
|
||||
'block_number'
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -24,20 +24,20 @@
|
||||
from ethereumetl.jobs.exporters.composite_item_exporter import CompositeItemExporter
|
||||
|
||||
FIELDS_TO_EXPORT = [
|
||||
'erc20_token_address',
|
||||
'erc20_token_symbol',
|
||||
'erc20_token_name',
|
||||
'erc20_token_decimals',
|
||||
'erc20_token_total_supply'
|
||||
'address',
|
||||
'symbol',
|
||||
'name',
|
||||
'decimals',
|
||||
'total_supply'
|
||||
]
|
||||
|
||||
|
||||
def erc20_tokens_item_exporter(erc20_tokens_output):
|
||||
def tokens_item_exporter(tokens_output):
|
||||
return CompositeItemExporter(
|
||||
filename_mapping={
|
||||
'erc20_token': erc20_tokens_output
|
||||
'token': tokens_output
|
||||
},
|
||||
field_mapping={
|
||||
'erc20_token': FIELDS_TO_EXPORT
|
||||
'token': FIELDS_TO_EXPORT
|
||||
}
|
||||
)
|
||||
@@ -21,13 +21,13 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
class EthErc20TokenMapper(object):
|
||||
def erc20_token_to_dict(self, erc20_token):
|
||||
class EthTokenMapper(object):
|
||||
def token_to_dict(self, token):
|
||||
return {
|
||||
'type': 'erc20_token',
|
||||
'erc20_token_address': erc20_token.erc20_token_address,
|
||||
'erc20_token_symbol': erc20_token.erc20_token_symbol,
|
||||
'erc20_token_name': erc20_token.erc20_token_name,
|
||||
'erc20_token_decimals': erc20_token.erc20_token_decimals,
|
||||
'erc20_token_total_supply': erc20_token.erc20_token_total_supply
|
||||
'type': 'token',
|
||||
'address': token.address,
|
||||
'symbol': token.symbol,
|
||||
'name': token.name,
|
||||
'decimals': token.decimals,
|
||||
'total_supply': token.total_supply
|
||||
}
|
||||
@@ -25,11 +25,11 @@ class EthTokenTransferMapper(object):
|
||||
def token_transfer_to_dict(self, token_transfer):
|
||||
return {
|
||||
'type': 'token_transfer',
|
||||
'erc20_token': token_transfer.erc20_token,
|
||||
'erc20_from': token_transfer.erc20_from,
|
||||
'erc20_to': token_transfer.erc20_to,
|
||||
'erc20_value': token_transfer.erc20_value,
|
||||
'erc20_tx_hash': token_transfer.erc20_tx_hash,
|
||||
'erc20_log_index': token_transfer.erc20_log_index,
|
||||
'erc20_block_number': token_transfer.erc20_block_number,
|
||||
'token_address': token_transfer.token_address,
|
||||
'from_address': token_transfer.from_address,
|
||||
'to_address': token_transfer.to_address,
|
||||
'value': token_transfer.value,
|
||||
'tx_hash': token_transfer.tx_hash,
|
||||
'log_index': token_transfer.log_index,
|
||||
'block_number': token_transfer.block_number,
|
||||
}
|
||||
|
||||
@@ -23,11 +23,11 @@
|
||||
|
||||
from web3.exceptions import BadFunctionCallOutput
|
||||
|
||||
from ethereumetl.domain.erc20_token import EthErc20Token
|
||||
from ethereumetl.domain.token import EthToken
|
||||
from ethereumetl.erc20_abi import ERC20_ABI
|
||||
|
||||
|
||||
class EthErc20TokenService(object):
|
||||
class EthTokenService(object):
|
||||
def __init__(self, web3, function_call_result_transformer=None):
|
||||
self._web3 = web3
|
||||
self._function_call_result_transformer = function_call_result_transformer
|
||||
@@ -41,12 +41,12 @@ class EthErc20TokenService(object):
|
||||
decimals = self._call_contract_function(contract.functions.decimals())
|
||||
total_supply = self._call_contract_function(contract.functions.totalSupply())
|
||||
|
||||
token = EthErc20Token()
|
||||
token.erc20_token_address = token_address
|
||||
token.erc20_token_symbol = symbol
|
||||
token.erc20_token_name = name
|
||||
token.erc20_token_decimals = decimals
|
||||
token.erc20_token_total_supply = total_supply
|
||||
token = EthToken()
|
||||
token.address = token_address
|
||||
token.symbol = symbol
|
||||
token.name = name
|
||||
token.decimals = decimals
|
||||
token.total_supply = total_supply
|
||||
|
||||
return token
|
||||
|
||||
@@ -51,13 +51,13 @@ class EthTokenTransferExtractor(object):
|
||||
return None
|
||||
|
||||
token_transfer = EthTokenTransfer()
|
||||
token_transfer.erc20_token = to_normalized_address(receipt_log.address)
|
||||
token_transfer.erc20_from = word_to_address(topics_with_data[1])
|
||||
token_transfer.erc20_to = word_to_address(topics_with_data[2])
|
||||
token_transfer.erc20_value = hex_to_dec(topics_with_data[3])
|
||||
token_transfer.erc20_tx_hash = receipt_log.transaction_hash
|
||||
token_transfer.erc20_log_index = receipt_log.log_index
|
||||
token_transfer.erc20_block_number = receipt_log.block_number
|
||||
token_transfer.token_address = to_normalized_address(receipt_log.address)
|
||||
token_transfer.from_address = word_to_address(topics_with_data[1])
|
||||
token_transfer.to_address = word_to_address(topics_with_data[2])
|
||||
token_transfer.value = hex_to_dec(topics_with_data[3])
|
||||
token_transfer.tx_hash = receipt_log.transaction_hash
|
||||
token_transfer.log_index = receipt_log.log_index
|
||||
token_transfer.block_number = receipt_log.block_number
|
||||
return token_transfer
|
||||
|
||||
return None
|
||||
|
||||
@@ -23,8 +23,6 @@
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
export_blocks_batch_size=100
|
||||
export_erc20_batch_size=100
|
||||
output_dir=.
|
||||
|
||||
current_time() { echo `date '+%Y-%m-%d %H:%M:%S'`; }
|
||||
@@ -93,7 +91,7 @@ for (( batch_start_block=$start_block; batch_start_block <= $end_block; batch_st
|
||||
transactions_file=${transactions_output_dir}/transactions_${file_name_suffix}.csv
|
||||
log "Exporting blocks ${block_range} to ${blocks_file}"
|
||||
log "Exporting transactions from blocks ${block_range} to ${transactions_file}"
|
||||
python3 export_blocks_and_transactions.py --start-block=${batch_start_block} --end-block=${batch_end_block} --provider-uri="${provider_uri}" --batch-size=${export_blocks_batch_size} --blocks-output=${blocks_file} --transactions-output=${transactions_file}
|
||||
python3 export_blocks_and_transactions.py --start-block=${batch_start_block} --end-block=${batch_end_block} --provider-uri="${provider_uri}" --blocks-output=${blocks_file} --transactions-output=${transactions_file}
|
||||
quit_if_returned_error
|
||||
|
||||
### token_transfers
|
||||
@@ -103,7 +101,7 @@ for (( batch_start_block=$start_block; batch_start_block <= $end_block; batch_st
|
||||
|
||||
token_transfers_file=${token_transfers_output_dir}/token_transfers_${file_name_suffix}.csv
|
||||
log "Exporting ERC20 transfers from blocks ${block_range} to ${token_transfers_file}"
|
||||
python3 export_token_transfers.py --start-block=${batch_start_block} --end-block=${batch_end_block} --provider-uri="${provider_uri}" --batch-size=${export_erc20_batch_size} --output=${token_transfers_file}
|
||||
python3 export_token_transfers.py --start-block=${batch_start_block} --end-block=${batch_end_block} --provider-uri="${provider_uri}" --output=${token_transfers_file}
|
||||
quit_if_returned_error
|
||||
|
||||
### receipts_and_logs
|
||||
@@ -146,22 +144,22 @@ for (( batch_start_block=$start_block; batch_start_block <= $end_block; batch_st
|
||||
python3 export_contracts.py --contract-addresses ${contract_addresses_file} --provider-uri="${provider_uri}" --output=${contracts_file}
|
||||
quit_if_returned_error
|
||||
|
||||
### erc20_tokens
|
||||
### tokens
|
||||
|
||||
erc20_token_addresses_output_dir=${output_dir}/erc20_token_addresses${partition_dir}
|
||||
mkdir -p ${erc20_token_addresses_output_dir}
|
||||
token_addresses_output_dir=${output_dir}/token_addresses${partition_dir}
|
||||
mkdir -p ${token_addresses_output_dir}
|
||||
|
||||
erc20_token_addresses_file=${erc20_token_addresses_output_dir}/erc20_token_addresses_${file_name_suffix}
|
||||
log "Extracting erc20_token_address from erc20_token_transfers file ${token_transfers_file}"
|
||||
python3 extract_csv_column.py -i ${token_transfers_file} -c erc20_token -o - | sort | uniq > ${erc20_token_addresses_file}
|
||||
token_addresses_file=${token_addresses_output_dir}/token_addresses_${file_name_suffix}
|
||||
log "Extracting token_address from token_transfers file ${token_transfers_file}"
|
||||
python3 extract_csv_column.py -i ${token_transfers_file} -c token_address -o - | sort | uniq > ${token_addresses_file}
|
||||
quit_if_returned_error
|
||||
|
||||
erc20_tokens_output_dir=${output_dir}/erc20_tokens${partition_dir}
|
||||
mkdir -p ${erc20_tokens_output_dir}
|
||||
tokens_output_dir=${output_dir}/tokens${partition_dir}
|
||||
mkdir -p ${tokens_output_dir}
|
||||
|
||||
erc20_tokens_file=${erc20_tokens_output_dir}/erc20_tokens_${file_name_suffix}.csv
|
||||
log "Exporting erc20_tokens from blocks ${block_range} to ${erc20_tokens_file}"
|
||||
python3 export_erc20_tokens.py --token-addresses ${erc20_token_addresses_file} --provider-uri="${provider_uri}" --output ${erc20_tokens_file}
|
||||
tokens_file=${tokens_output_dir}/tokens_${file_name_suffix}.csv
|
||||
log "Exporting tokens from blocks ${block_range} to ${tokens_file}"
|
||||
python3 export_tokens.py --token-addresses ${token_addresses_file} --provider-uri="${provider_uri}" --output ${tokens_file}
|
||||
quit_if_returned_error
|
||||
|
||||
end_time=$(date +%s)
|
||||
|
||||
@@ -26,8 +26,8 @@ import argparse
|
||||
from web3 import Web3
|
||||
|
||||
from ethereumetl.file_utils import smart_open
|
||||
from ethereumetl.jobs.export_erc20_tokens_job import ExportErc20TokensJob
|
||||
from ethereumetl.jobs.exporters.erc20_tokens_item_exporter import erc20_tokens_item_exporter
|
||||
from ethereumetl.jobs.export_tokens_job import ExportTokensJob
|
||||
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
|
||||
from ethereumetl.logging_utils import logging_basic_config
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from ethereumetl.providers.auto import get_provider_from_uri
|
||||
@@ -45,10 +45,10 @@ parser.add_argument('-p', '--provider-uri', default='https://mainnet.infura.io/'
|
||||
args = parser.parse_args()
|
||||
|
||||
with smart_open(args.token_addresses, 'r') as token_addresses_file:
|
||||
job = ExportErc20TokensJob(
|
||||
job = ExportTokensJob(
|
||||
token_addresses_iterable=(token_address.strip() for token_address in token_addresses_file),
|
||||
web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(args.provider_uri))),
|
||||
item_exporter=erc20_tokens_item_exporter(args.output),
|
||||
item_exporter=tokens_item_exporter(args.output),
|
||||
max_workers=args.max_workers)
|
||||
|
||||
job.run()
|
||||
@@ -1,21 +0,0 @@
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS erc20_tokens (
|
||||
erc20_token_address STRING,
|
||||
erc20_token_symbol STRING,
|
||||
erc20_token_name STRING,
|
||||
erc20_token_decimals BIGINT,
|
||||
erc20_token_total_supply DECIMAL(38,0)
|
||||
)
|
||||
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
|
||||
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
|
||||
WITH SERDEPROPERTIES (
|
||||
'serialization.format' = ',',
|
||||
'field.delim' = ',',
|
||||
'escape.delim' = '\\'
|
||||
)
|
||||
STORED AS TEXTFILE
|
||||
LOCATION 's3://<your_bucket>/ethereumetl/export/erc20_tokens'
|
||||
TBLPROPERTIES (
|
||||
'skip.header.line.count' = '1'
|
||||
);
|
||||
|
||||
MSCK REPAIR TABLE erc20_tokens;
|
||||
@@ -1,11 +1,11 @@
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS parquet_token_transfers (
|
||||
erc20_token STRING,
|
||||
erc20_from STRING,
|
||||
erc20_to STRING,
|
||||
erc20_value DECIMAL(38,0),
|
||||
erc20_tx_hash STRING,
|
||||
erc20_log_index BIGINT,
|
||||
erc20_block_number BIGINT
|
||||
token_address STRING,
|
||||
from_address STRING,
|
||||
to_address STRING,
|
||||
value DECIMAL(38,0),
|
||||
tx_hash STRING,
|
||||
log_index BIGINT,
|
||||
block_number BIGINT
|
||||
)
|
||||
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
|
||||
STORED AS PARQUET
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS token_transfers (
|
||||
erc20_token STRING,
|
||||
erc20_from STRING,
|
||||
erc20_to STRING,
|
||||
erc20_value DECIMAL(38,0),
|
||||
erc20_tx_hash STRING,
|
||||
erc20_log_index BIGINT,
|
||||
erc20_block_number BIGINT
|
||||
token_address STRING,
|
||||
from_address STRING,
|
||||
to_address STRING,
|
||||
value DECIMAL(38,0),
|
||||
tx_hash STRING,
|
||||
log_index BIGINT,
|
||||
block_number BIGINT
|
||||
)
|
||||
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
|
||||
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
|
||||
|
||||
21
schemas/aws/tokens.sql
Normal file
21
schemas/aws/tokens.sql
Normal file
@@ -0,0 +1,21 @@
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS tokens (
|
||||
address STRING,
|
||||
symbol STRING,
|
||||
name STRING,
|
||||
decimals BIGINT,
|
||||
total_supply DECIMAL(38,0)
|
||||
)
|
||||
PARTITIONED BY (start_block BIGINT, end_block BIGINT)
|
||||
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
|
||||
WITH SERDEPROPERTIES (
|
||||
'serialization.format' = ',',
|
||||
'field.delim' = ',',
|
||||
'escape.delim' = '\\'
|
||||
)
|
||||
STORED AS TEXTFILE
|
||||
LOCATION 's3://<your_bucket>/ethereumetl/export/tokens'
|
||||
TBLPROPERTIES (
|
||||
'skip.header.line.count' = '1'
|
||||
);
|
||||
|
||||
MSCK REPAIR TABLE tokens;
|
||||
@@ -1,18 +0,0 @@
|
||||
WITH erc20_tokens_grouped AS (
|
||||
SELECT
|
||||
erc20_token_address,
|
||||
erc20_token_symbol,
|
||||
erc20_token_name,
|
||||
erc20_token_decimals,
|
||||
erc20_token_total_supply,
|
||||
ROW_NUMBER() OVER (PARTITION BY erc20_token_address) AS rank
|
||||
FROM
|
||||
`ethereum.erc20_tokens_duplicates`)
|
||||
SELECT
|
||||
erc20_token_address,
|
||||
erc20_token_symbol,
|
||||
erc20_token_name,
|
||||
erc20_token_decimals,
|
||||
erc20_token_total_supply
|
||||
FROM erc20_tokens_grouped
|
||||
WHERE erc20_tokens_grouped.rank = 1
|
||||
@@ -1,38 +1,38 @@
|
||||
[
|
||||
{
|
||||
"name": "erc20_token",
|
||||
"name": "token_address",
|
||||
"type": "STRING",
|
||||
"description": "ERC20 token address"
|
||||
},
|
||||
{
|
||||
"name": "erc20_from",
|
||||
"name": "from_address",
|
||||
"type": "STRING",
|
||||
"description": "Address of the sender"
|
||||
},
|
||||
{
|
||||
"name": "erc20_to",
|
||||
"name": "to_address",
|
||||
"type": "STRING",
|
||||
"description": "Address of the receiver"
|
||||
},
|
||||
{
|
||||
"name": "erc20_value",
|
||||
"name": "value",
|
||||
"type": "STRING",
|
||||
"description": "Value transferred (ERC20) / id of the token transferred (ERC721)"
|
||||
},
|
||||
{
|
||||
"name": "erc20_tx_hash",
|
||||
"name": "tx_hash",
|
||||
"type": "STRING",
|
||||
"mode": "REQUIRED",
|
||||
"description": "Transaction hash"
|
||||
},
|
||||
{
|
||||
"name": "erc20_log_index",
|
||||
"name": "log_index",
|
||||
"type": "INT64",
|
||||
"mode": "REQUIRED",
|
||||
"description": "Log index in the transaction receipt"
|
||||
},
|
||||
{
|
||||
"name": "erc20_block_number",
|
||||
"name": "block_number",
|
||||
"type": "INT64",
|
||||
"description": "The block number"
|
||||
}
|
||||
|
||||
@@ -1,27 +1,27 @@
|
||||
[
|
||||
{
|
||||
"name": "erc20_token_address",
|
||||
"name": "address",
|
||||
"type": "STRING",
|
||||
"mode": "REQUIRED",
|
||||
"description": "The address of the ERC20 token"
|
||||
},
|
||||
{
|
||||
"name": "erc20_token_symbol",
|
||||
"name": "symbol",
|
||||
"type": "STRING",
|
||||
"description": "The symbol of the ERC20 token"
|
||||
},
|
||||
{
|
||||
"name": "erc20_token_name",
|
||||
"name": "name",
|
||||
"type": "STRING",
|
||||
"description": "The name of the ERC20 token"
|
||||
},
|
||||
{
|
||||
"name": "erc20_token_decimals",
|
||||
"name": "decimals",
|
||||
"type": "INT64",
|
||||
"description": "The number of decimals the token uses - e.g. 8, means to divide the token amount by 100000000 to get its user representation"
|
||||
},
|
||||
{
|
||||
"name": "erc20_token_total_supply",
|
||||
"name": "total_supply",
|
||||
"type": "NUMERIC",
|
||||
"description": "The total token supply"
|
||||
}
|
||||
18
schemas/gcp/tokens_deduplicate.sql
Normal file
18
schemas/gcp/tokens_deduplicate.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
WITH tokens_grouped AS (
|
||||
SELECT
|
||||
address,
|
||||
symbol,
|
||||
name,
|
||||
decimals,
|
||||
total_supply,
|
||||
ROW_NUMBER() OVER (PARTITION BY address) AS rank
|
||||
FROM
|
||||
`ethereum.tokens_duplicates`)
|
||||
SELECT
|
||||
address,
|
||||
symbol,
|
||||
name,
|
||||
decimals,
|
||||
total_supply
|
||||
FROM tokens_grouped
|
||||
WHERE tokens_grouped.rank = 1
|
||||
@@ -27,12 +27,12 @@ import pytest
|
||||
from web3 import Web3, IPCProvider
|
||||
|
||||
import tests.resources
|
||||
from ethereumetl.jobs.export_erc20_tokens_job import ExportErc20TokensJob
|
||||
from ethereumetl.jobs.exporters.erc20_tokens_item_exporter import erc20_tokens_item_exporter
|
||||
from ethereumetl.jobs.export_tokens_job import ExportTokensJob
|
||||
from ethereumetl.jobs.exporters.tokens_item_exporter import tokens_item_exporter
|
||||
from ethereumetl.thread_local_proxy import ThreadLocalProxy
|
||||
from tests.helpers import compare_lines_ignore_order, read_file
|
||||
|
||||
RESOURCE_GROUP = 'test_export_erc20_tokens_job'
|
||||
RESOURCE_GROUP = 'test_export_tokens_job'
|
||||
|
||||
|
||||
def read_resource(resource_group, file_name):
|
||||
@@ -57,17 +57,17 @@ class MockWeb3Provider(IPCProvider):
|
||||
@pytest.mark.parametrize("token_addresses,resource_group", [
|
||||
(['0xf763be8b3263c268e9789abfb3934564a7b80054'], 'token_with_invalid_data')
|
||||
])
|
||||
def test_export_erc20_tokens_job(tmpdir, token_addresses, resource_group):
|
||||
output_file = tmpdir.join('erc20_tokens.csv')
|
||||
def test_export_tokens_job(tmpdir, token_addresses, resource_group):
|
||||
output_file = tmpdir.join('tokens.csv')
|
||||
|
||||
job = ExportErc20TokensJob(
|
||||
job = ExportTokensJob(
|
||||
token_addresses_iterable=token_addresses,
|
||||
web3=ThreadLocalProxy(lambda: Web3(MockWeb3Provider(resource_group))),
|
||||
item_exporter=erc20_tokens_item_exporter(output_file),
|
||||
item_exporter=tokens_item_exporter(output_file),
|
||||
max_workers=5
|
||||
)
|
||||
job.run()
|
||||
|
||||
compare_lines_ignore_order(
|
||||
read_resource(resource_group, 'expected_erc20_tokens.csv'), read_file(output_file)
|
||||
read_resource(resource_group, 'expected_tokens.csv'), read_file(output_file)
|
||||
)
|
||||
@@ -40,9 +40,9 @@ def test_extract_transfer_from_receipt_log():
|
||||
|
||||
token_transfer = token_transfer_extractor.extract_transfer_from_log(log)
|
||||
|
||||
assert token_transfer.erc20_token == '0x25c6413359059694a7fca8e599ae39ce1c944da2'
|
||||
assert token_transfer.erc20_from == '0xe9eeaec75883f0e389a78e2260bfac1776df2f1d'
|
||||
assert token_transfer.erc20_to == '0x0000000000000000000000000000000000000000'
|
||||
assert token_transfer.erc20_value == 115792089237316195423570985008687907853269984665640564039457584007913129638936
|
||||
assert token_transfer.erc20_tx_hash == '0xd62a74c7b04e8e0539398f6ba6a5eb11ad8aa862e77f0af718f0fad19b0b0480'
|
||||
assert token_transfer.erc20_block_number == 1061946
|
||||
assert token_transfer.token_address == '0x25c6413359059694a7fca8e599ae39ce1c944da2'
|
||||
assert token_transfer.from_address == '0xe9eeaec75883f0e389a78e2260bfac1776df2f1d'
|
||||
assert token_transfer.to_address == '0x0000000000000000000000000000000000000000'
|
||||
assert token_transfer.value == 115792089237316195423570985008687907853269984665640564039457584007913129638936
|
||||
assert token_transfer.tx_hash == '0xd62a74c7b04e8e0539398f6ba6a5eb11ad8aa862e77f0af718f0fad19b0b0480'
|
||||
assert token_transfer.block_number == 1061946
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
erc20_token_address,erc20_token_symbol,erc20_token_name,erc20_token_decimals,erc20_token_total_supply
|
||||
0xf763be8b3263c268e9789abfb3934564a7b80054,ETH,ETH,18,6547475210000000000
|
||||
|
@@ -1,3 +1,3 @@
|
||||
erc20_token,erc20_from,erc20_to,erc20_value,erc20_tx_hash,erc20_log_index,erc20_block_number
|
||||
token_address,from_address,to_address,value,tx_hash,log_index,block_number
|
||||
0xf4eced2f682ce333f96f2d8966c613ded8fc95dd,0x1b63142628311395ceafeea5667e7c9026c862ca,0xac4df82fe37ea2187bc8c011a23d743b4f39019a,100000,0x04cbcb236043d8fb7839e07bbc7f5eed692fb2ca55d897f1101eac3e3ad4fab8,0,483920
|
||||
0xf4eced2f682ce333f96f2d8966c613ded8fc95dd,0x9b22a80d5c7b3374a05b446081f97d0a34079e7f,0x66f183060253cfbe45beff1e6e7ebbe318c81e56,200000,0xcea6f89720cc1d2f46cc7a935463ae0b99dd5fad9c91bb7357de5421511cee49,1,483920
|
||||
|
||||
|
@@ -0,0 +1,2 @@
|
||||
address,symbol,name,decimals,total_supply
|
||||
0xf763be8b3263c268e9789abfb3934564a7b80054,ETH,ETH,18,6547475210000000000
|
||||
|
@@ -1,3 +1,3 @@
|
||||
erc20_token,erc20_from,erc20_to,erc20_value,erc20_tx_hash,erc20_log_index,erc20_block_number
|
||||
token_address,from_address,to_address,value,tx_hash,log_index,block_number
|
||||
0xc66ea802717bfb9833400264dd12c2bceaa34a6d,0x9f73bc871764c879fd9e0f524278373fa7875068,0xf51bc4633f5924465c8c6317169faf3e4312e82f,109000000000000000000,0x5cb4fc2e3d217f3c286358d6bc042259c8befb0dabe450567a987f5770043157,3,1452581
|
||||
0xe0b7927c4af23765cb51314a0e0521a9645f0e2a,0x8d7b6fb1523f04e644085e14d5e49b1c6278c92e,0x4b0df684f9c9789d0e30475d654eec2fc1634a1f,40000000000,0xcded4ed21d5825c063833ed599814a0f687e3e411657a5ee1170ac625df607b7,5,1452581
|
||||
|
Reference in New Issue
Block a user