code enhacements and etc.

This commit is contained in:
ashpect
2024-07-18 03:48:23 +05:30
parent 140fd4b1b6
commit d5f571ca50
10 changed files with 997 additions and 303 deletions

View File

@@ -43,11 +43,8 @@ template ProvePassportNotInOfac(nLevels) {
poseidon_hash.inputs[2] <== 1;
signal computedRoot <== BinaryMerkleRoot(256)(poseidon_hash.out, smt_size, smt_path, smt_siblings);
component iseq = IsEqual();
computedRoot ==> iseq.in[0];
smt_root ==> iseq.in[1];
out1 <== iseq.out;
out1 <== IsEqual()([computedRoot,smt_root]);
// if out == false ; then proof failed as path and siblings do not compute to root
// now if out == 1; path and siblings are true but the leaf_value given might be closest or might be actual
// check if leaf_value = posiedon_hasher.out
@@ -58,14 +55,11 @@ template ProvePassportNotInOfac(nLevels) {
// signal output commonLength;
// true if leaf given = leaf calulated
component iseq2 = IsEqual();
leaf_value ==> iseq2.in[0];
poseidon_hasher.out ==> iseq2.in[1];
out2 <== iseq2.out;
proofType <== iseq2.out;
out2 <== IsEqual()([leaf_value,poseidon_hasher.out]);
proofType <== out2;
component lt = LessEqThan(9);
smt_size ==> lt.in[0];
lt.in[0] <== smt_size;
lt.in[1] <== PoseidonHashesCommonLength()(leaf_value,poseidon_hasher.out);
out3 <== lt.out; // true if depth <= matchingbits.length

View File

@@ -2,7 +2,7 @@ import { expect } from 'chai'
import path from "path";
const wasm_tester = require("circom_tester").wasm;
import { mockPassportData_sha256WithRSAEncryption_65537 } from '../../common/src/utils/mockPassportData';
import { passport_smt } from '../../common/src/utils/passportTree';
import { ofac_smt } from '../../common/src/utils/ofacTree';
import { generateCircuitInputsOfac, generateCircuitInputsDisclose } from '../../common/src/utils/generateInputs';
import { getLeaf } from '../../common/src/utils/pubkeyTree';
import { SMT } from "@zk-kit/smt"
@@ -65,7 +65,7 @@ describe("start testing disclose.circom", function () {
tree.insert(BigInt(commitment));
// smt stuff
let smttrees = passport_smt();
let smttrees = ofac_smt();
passportTree = smttrees[0];
nameDobTree = smttrees[1];
nameTree = smttrees[2];

28
common/ofacdata/ReadMe.md Normal file
View File

@@ -0,0 +1,28 @@
# How we Process Ofac Lists :
## Data Collection :
- We collect the data from the official website of the U.S. Department of the Treasury's Office of Foreign Assets Control (OFAC) and download the data in the form of a CSV file from [here](https://sanctionslist.ofac.treas.gov/Home/SdnList)
- The SDN list contains the names of individuals, entities and groups designated by OFAC as well as the listing of maritime vessels and aircraft that are blocked by OFAC.
### ofacdata/original
- The data is stored in the form of 2 CSV files named `sdn.csv` and `add.csv`. `dataspec.txt` explains the data specification for the CSV data files.
- The data is cleaned to obtain the required information for individuals from sdn.csv file.
A ballpark number of 6917 individuals (at the time of writing this document) entries are present in sdn.csv. Remaining entries are entities, vessels, and aircrafts.
## Data Processing :
### ofacdata/scripts
- The `ofac.ipynb` script extracts the data from both the csv's and parses them in json fommat.
- We parse all ethereum addresses, regardless of individual or entity in eth_addresses.json.
- For individuals, we parse :
- full name (first name, last name), dob(day, month, year) in names.json
- passports and passport issuing country in passport.json
## Data Usage :
These jsons are later used to create sparse merkle trees for non-membership proofs. We provide 3 levels of proofs.
- Match through Passport Number : level 3 (Absolute Match)
- Match through Names and Dob combo tree : level 2 (High Probability Match)
- Match only through Names : level 1 (Partial Match)
Check out src/ofacTree.ts for more details.<br>

View File

@@ -0,0 +1,149 @@
[
{
"Eth_address": "0x098B716B8Aaf21512996dC57EB0615e2383E2f96"
},
{
"Eth_address": "0xa0e1c89Ef1a489c9C7dE96311eD5Ce5D32c20E4B"
},
{
"Eth_address": "0x3Cffd56B47B7b41c56258D9C7731ABaDc360E073"
},
{
"Eth_address": "0x53b6936513e738f44FB50d2b9476730C0Ab3Bfc1"
},
{
"Eth_address": "0x35fB6f6DB4fb05e6A4cE86f2C93691425626d4b1"
},
{
"Eth_address": "0xF7B31119c2682c88d88D455dBb9d5932c65Cf1bE"
},
{
"Eth_address": "0x3e37627dEAA754090fBFbb8bd226c1CE66D255e9"
},
{
"Eth_address": "0x08723392Ed15743cc38513C4925f5e6be5c17243"
},
{
"Eth_address": "0x7F367cC41522cE07553e823bf3be79A889DEbe1B"
},
{
"Eth_address": "0xd882cfc20f52f2599d84b8e8d58c7fb62cfe344b"
},
{
"Eth_address": "0x901bb9583b24d97e995513c6778dc6888ab6870e"
},
{
"Eth_address": "0xa7e5d5a720f06526557c513402f2e6b5fa20b008"
},
{
"Eth_address": "0x9f4cda013e354b8fc285bf4b9a60460cee7f7ea9"
},
{
"Eth_address": "0x3cbded43efdaf0fc77b9c55f6fc9988fcc9b757d"
},
{
"Eth_address": "0x7FF9cFad3877F21d41Da833E2F775dB0569eE3D9"
},
{
"Eth_address": "0xc2a3829F459B3Edd87791c74cD45402BA0a20Be3"
},
{
"Eth_address": "0x3AD9dB589d201A710Ed237c829c7860Ba86510Fc"
},
{
"Eth_address": "0x12D66f87A04A9E220743712cE6d9bB1B5616B8Fc"
},
{
"Eth_address": "0x47CE0C6eD5B0Ce3d3A51fdb1C52DC66a7c3c2936"
},
{
"Eth_address": "0x910Cbd523D972eb0a6f4cAe4618aD62622b39DbF"
},
{
"Eth_address": "0xA160cdAB225685dA1d56aa342Ad8841c3b53f291"
},
{
"Eth_address": "0xD4B88Df4D29F5CedD6857912842cff3b20C8Cfa3"
},
{
"Eth_address": "0xFD8610d20aA15b7B2E3Be39B396a1bC3516c7144"
},
{
"Eth_address": "0x07687e702b410Fa43f4cB4Af7FA097918ffD2730"
},
{
"Eth_address": "0x23773E65ed146A459791799d01336DB287f25334"
},
{
"Eth_address": "0x22aaA7720ddd5388A3c0A3333430953C68f1849b"
},
{
"Eth_address": "0x03893a7c7463AE47D46bc7f091665f1893656003"
},
{
"Eth_address": "0x2717c5e28cf931547B621a5dddb772Ab6A35B701"
},
{
"Eth_address": "0xD21be7248e0197Ee08E0c20D4a96DEBdaC3D20Af"
},
{
"Eth_address": "0x39D908dac893CBCB53Cc86e0ECc369aA4DeF1A29"
},
{
"Eth_address": "0x4f47bc496083c727c5fbe3ce9cdf2b0f6496270c"
},
{
"Eth_address": "0x97b1043abd9e6fc31681635166d430a458d14f9c"
},
{
"Eth_address": "0xb6f5ec1a0a9cd1526536d3f0426c429529471f40"
},
{
"Eth_address": "0x9c2bc757b66f24d60f016b6237f8cdd414a879fa"
},
{
"Eth_address": "0xdcbEfFBECcE100cCE9E4b153C4e15cB885643193"
},
{
"Eth_address": "0x5f48c2a71b2cc96e3f0ccae4e39318ff0dc375b2"
},
{
"Eth_address": "0x5a7a51bfb49f190e5a6060a5bc6052ac14a3b59f"
},
{
"Eth_address": "0xed6e0a7e4ac94d976eebfb82ccf777a3c6bad921"
},
{
"Eth_address": "0x797d7ae72ebddcdea2a346c1834e04d1f8df102b"
},
{
"Eth_address": "0x931546D9e66836AbF687d2bc64B30407bAc8C568"
},
{
"Eth_address": "0x43fa21d92141BA9db43052492E0DeEE5aa5f0A93"
},
{
"Eth_address": "0x6be0ae71e6c41f2f9d0d1a3b8d0f75e6f6a0b46e"
},
{
"Eth_address": "0x530a64c0ce595026a4a556b703644228179e2d57"
},
{
"Eth_address": "0x983a81ca6FB1e441266D2FbcB7D8E530AC2E05A2"
},
{
"Eth_address": "0x961c5be54a2ffc17cf4cb021d863c42dacd47fc1"
},
{
"Eth_address": "0xE950DC316b836e4EeFb8308bf32Bf7C72a1358FF"
},
{
"Eth_address": "0x21B8d56BDA776bbE68655A16895afd96F5534feD"
},
{
"Eth_address": "0xf3701f445b6bdafedbca97d1e477357839e4120d"
},
{
"Eth_address": "0x19F8f2B0915Daa12a3f5C9CF01dF9E24D53794F7"
}
]

View File

@@ -1,35 +0,0 @@
[
{
"Eth_address": "0x7f367cc41522ce07553e823bf3be79a889debe1b"
},
{
"Eth_address": "0xd882cfc20f52f2599d84b8e8d58c7fb62cfe344b"
},
{
"Eth_address": "0x901bb9583b24d97e995513c6778dc6888ab6870e"
},
{
"Eth_address": "0x39d908dac893cbcb53cc86e0ecc369aa4def1a29"
},
{
"Eth_address": "0x4f47bc496083c727c5fbe3ce9cdf2b0f6496270c"
},
{
"Eth_address": "0x97b1043abd9e6fc31681635166d430a458d14f9c"
},
{
"Eth_address": "0x9c2bc757b66f24d60f016b6237f8cdd414a879fa"
},
{
"Eth_address": "0xdcbeffbecce100cce9e4b153c4e15cb885643193"
},
{
"Eth_address": "0x530a64c0ce595026a4a556b703644228179e2d57"
},
{
"Eth_address": "0x961c5be54a2ffc17cf4cb021d863c42dacd47fc1"
},
{
"Eth_address": "0xf3701f445b6bdafedbca97d1e477357839e4120d"
}
]

View File

@@ -0,0 +1,308 @@
OFFICE OF FOREIGN ASSETS CONTROL
U.S. TREASURY DEPARTMENT
SPECIALLY DESIGNATED NATIONALS AND BLOCKED PERSONS
DATA SPECIFICATION
First Released: 12/06/2004
Updated: 09/25/2023
PLEASE NOTE IMPORTANT INFORMATION REGARDING THE TECHNICAL STRUCTURE
OF THESE FILES ARE FEATURED AT THE BOTTOM OF THE DOCUMENT
OFAC is now publishing its list of Specially Designated Nationals in XML
and Comma delimited (CSV) format. These digital publications as
transmitted by OFAC are designed as reference tools providing actual notice of
actions by OFAC with respect to Specially Designated Nationals and other
entities whose property is blocked, to assist the public in complying with the
various sanctions programs administered by OFAC. The latest changes may appear
here prior to their publication in the Federal Register, and it is intended that
users rely on changes indicated in these documents that post-date the most
recent Federal Register publication with respect to a particular sanctions
program in the appendices to chapter V of Title 31, Code of Federal Regulations.
Such changes reflect official actions of OFAC, and will be reflected as soon as
practicable in the Federal Register under the index heading "Foreign Assets
Control." New Federal Register notices with regard to Specially Designated
Nationals or blocked entities may be published at any time. Users are advised
to check the Federal Register and these electronic publications routinely for
additional names or other changes to the listings. Entities and individuals on
the list are occasionally licensed by OFAC to transact business with U.S.
persons in anticipation of removal from the list or because of foreign policy
considerations in unique circumstances. Licensing in anticipation of official
Federal Register publication of a notice of removal based on the unblocking of
an entity's or individual's property is reflected in these publications by
removal from the list. Current information on licenses issued with regard to
Specially Designated Nationals and other blocked persons may be obtained or
verified by calling OFAC Licensing at (202) 622-2480.
Technical Specification:
Format *.ff consists of records separated by carriage returns, with fields
within the records beginning at fixed locations.
Format *.csv consistes of records seperated by carriage returns (ASCII character
13), with fields (values) within records delimited by the "," (comma) symbol
(ASCII character 44).
Null values for all four formats consist of "-0-" (ASCII characters 45, 48, 45).
The Comma Seperated Values (.csv), and Fixed-Field (.ff) releases consist of three ASCII text files--a main
file listing the name of the SDN and other information unique to that entity
(sdn.*), a file of addresses (add.*), and a file of alternate names (alt.*).
Addresses and alternate names are linked to particular SDNs using unique integer
values in a linking or primary key column. The integers used are assigned for
linking purposes only and do not represent an official reference to that entity.
Releases of the database-format files are intended as a service to the user
community. OFAC's SDN list is published in the Federal Register. All of OFAC's
lists are drawn from the same underlying data and every effort has been made to
ensure consistency. The Federal Register will govern should differences arise.
Due to the nature, urgency, and sensitivity of the programs which OFAC
administers and enforces, it may not always be possible to provide advanced
notice to users of format changes to the database structure.
The files associated with each release are:
fixed field: SDN.FF, ADD.FF, ALT.FF, SDN_COMMENTS.FF
Comma delimited: SDN.CSV, ADD.CSV, ALT.CSV, SDN COMMENTS.CSV
XML: SDN.XML, SDN_ADVANCED.XML
Misc: dat_spec.txt (this file), sdn.xsd (XML SDN schema),
sdn_advanced.xsd (advanced XML SDN schema).
FORMAT SDN FIXED FIELD
Main table, text file name SDN.FF
Column Posi-
sequence Column name Type Size tion Description
-------- ----------- ------- ---- ---- ---------------------
1 ent_num number 10 10 unique record
identifier/unique
listing identifier
2 SDN_Name text 350 11 name of sdn
3 SDN_Type text 12 361 type of SDN
4 Program text 200 373 sanctions program name
5 Title text 200 573 title of an individual
6 Call_Sign text 8 773 vessel call sign
7 Vess_type text 25 781 vessel type
8 Tonnage text 14 806 vessel tonnage
9 GRT text 8 820 gross registered
tonnage
10 Vess_flag text 40 828 vessel flag
11 Vess_owner text 150 868 vessel owner
12 Remarks text 1000 1018 remarks on SDN*
END OF ROW 2018
Address table, text file name ADD.FF
Column Posi-
sequence Column name Type Size tion Description
-------- ----------- ------- ---- ---- ---------------------
1 Ent_num number 10 1 link to unique listing
2 Add_num number 10 11 unique record
identifier
3 Address text 750 21 street address of SDN
4 City/ text 116 771 city, state/province, zip/postal code
State/Province/
Postal Code
5 Country text 250 887 country of address
6 Add_remarks text 200 1137 remarks on address
END OF ROW 1337
Alternate identity table, text file name ALT.FF
Column Posi-
sequence Column name Type Size tion Description
-------- ----------- ------- ---- ---- ---------------------
1 ent_num number 10 1 link to unique listing
2 alt_num number 10 11 unique record
identifier
3 alt_type text 8 21 type of alternate
identity
(aka, fka, nka)
4 alt_name text 350 29 alternate identity name
5 alt_remarks text 200 379 remarks on alternate
identity
END OF ROW 579
Record separator: carriage return
null: -0-
FORMAT SDN CSV
Main table, text file name SDN.CSV
Column
sequence Column name Type Size Description
-------- ------------ ------- ---- ---------------------
1 ent_num number unique record
identifier/unique
listing identifier
2 SDN_Name text 350 name of SDN
3 SDN_Type text 12 type of SDN
4 Program text 200 sanctions program name
5 Title text 200 title of an individual
6 Call_Sign text 8 vessel call sign
7 Vess_type text 25 vessel type
8 Tonnage text 14 vessel tonnage
9 GRT text 8 gross registered tonnage
10 Vess_flag text 40 vessel flag
11 Vess_owner text 150 vessel owner
12 Remarks text 1000 remarks on SDN*
Address table, text file name ADD.CSV
Column
sequence Column name Type Size Description
-------- ------------ ------- ---- ---------------------
1 Ent_num number link to unique listing
2 Add_num number unique record identifier
3 Address text 750 street address of SDN
4 City/ text 116 city, state/province, zip/postal code
State/Province/
Postal Code
5 Country text 250 country of address
6 Add_remarks text 200 remarks on address
Alternate identity table, text file name ALT.CSV
Column
sequence Column name Type Size Description
-------- ------------ ------- ---- ---------------------
1 ent_num number link to unique listing
2 alt_num number unique record identifier
3 alt_type text 8 type of alternate identity
(aka, fka, nka)
4 alt_name text 350 alternate identity name
5 alt_remarks text 200 remarks on alternate identity
Record separator: carriage return
field (value) delimiter: ,
text value quotes: "
null: -0-
*SPILLOVER FILES:
OFAC has made certain changes to its SDN production system that now allow for
an unlimited number of identifiers, features and linked to identifications to
be added to a record. In the fixed-width and delimited files these data are
stored in the remarks field. Due to these changes, it is now possible for an
SDN record to exceed the 1000 character remarks limitation. Data that exceeds
the specified field limit will be truncated to ensure that the current data
specification is followed. However, in order to ensure that users of these
files continue to have access to truncated data, OFAC has created "spillover files."
These files will follow the same data specification of the files they are
associated with. However, there will be no upper limit on row length in these files.
The spillover file names are:
sdn_comments.csv
sdn_comments.ff
These files will be listed separately on the OFAC website's SDN page. They will also be listed
separately in the library/fac_dlim and /fac_delim folders of OFAC's FTP sites.
Please visit the following tutorial on OFAC's website for more information on
creating a database using these files:
https://ofac.treasury.gov/sdn-list-data-formats-data-schemas/tutorial-on-the-use-of-list-related-legacy-flat-files
THE DISPOSITION OF ALIASES:
OFAC classifies SDN aliases as weak or strong. In the data files
discussed in this document, weak aliases are not stored in the alt.* files.
Weak aliases are stored in the remarks field that trails every primary
SDN record in the SDN.* files. For more information on weak aliases
please review the following text taken from the frequently asked questions
on OFAC's website.
What are weak aliases (AKAs)?
A "weak AKA" is a term for a relatively broad or generic alias that
may generate a large volume of false hits. Weak AKAs include
nicknames, noms-de-guerre, and unusually common acronyms. OFAC
includes these AKAs because, based on information available to it, the
sanctions targets refer to themselves, or are referred to, by these
names. As a result, these AKAs may be useful for identification
purposes, particularly in confirming a possible "hit" or "match"
triggered by other identifier information. Realizing, however, the
large number of false hits that these names may generate, OFAC
qualitatively distinguishes them from other AKAs by designating them
as weak. OFAC has instituted procedures that attempt to make this
qualitative review of aliases as objective as possible. Before
issuing this updated guidance, OFAC conducted a review of all aliases
on the SDN list. Each SDN alias was run through a computer program
that evaluated the potential of an alias to produce false positives in
an automated screening environment. Names were evaluated using the
following criteria:
Character length (shorter strings were assumed to be less effective in
a screening environment than longer strings);
The presence of numbers in an alias (digits 0-9);
The presence of common words that are generally considered to
constitute a nickname (example: Ahmed the Tall);
References in the alias to geographic locations (example: Ahmed the
Sudanese);
The presence of very common prefixes in a name where the prefix was
one of only two strings in a name (example: Mr. Smith).
Aliases that met one or more of the above criteria were flagged for
human review. OFAC subject matter experts then reviewed each of the
automated recommendations and made final decisions on the flagging of
each alias.*
OFAC intends to use these procedures to evaluate all new aliases
introduced to the SDN list.
Where can I find weak aliases (AKAs)?
Weak AKAs appear differently depending on which file format of the SDN
List is utilized.
In the TXT and PDF versions of the SDN List, weak AKAs are
encapsulated in double-quotes within the AKA listing:
ALLANE, Hacene (a.k.a. ABDELHAY, al-Sheikh; a.k.a. AHCENE, Cheib;
a.k.a. "ABU AL-FOUTOUH"; a.k.a. "BOULAHIA"; a.k.a. "HASSAN THE OLD");
DOB 17 Jan 1941; POB El Menea, Algeria (individual) [SDGT]
This convention also is followed in the alphabetical listing published
in Appendix A to Chapter V of Title 31 of the Code of Federal
Regulations.
In the FF, and CSV file formats, weak AKAs are listed in the
Remarks field (found at the end of the record) of the SDN file. In
these formats, weak AKAs are bracketed by quotation marks.
8219@"ALLANE, Hacene"@"individual"@"SDGT"@-0- @-0- @-0- @-0- @-0- @-0-
@-0- @"DOB 17 Jan 1941; POB El Menea, Algeria; a.k.a. 'ABU
AL-FOUTOUH'; a.k.a. 'BOULAHIA'; a.k.a. 'HASSAN THE OLD'."
In the XML version of the SDN List, there is a Type element for each
AKA. The Type can either be 'weak' or 'strong' (see the XML SDN
Schema (XSD file) at:
http://www.treasury.gov/resource-center/sanctions/SDN-List/Documents/sdn.xsd for more
information).
Am I required to screen for weak aliases (AKAs)?
OFAC's regulations do not explicitly require any specific screening
regime. Financial institutions and others must make screening choices
based on their circumstances and compliance approach. As a general
matter, though, OFAC does not expect that persons will screen for weak
AKAs, but expects that such AKAs may be used to help determine whether
a "hit" arising from other information is accurate.
Will I be penalized for processing an unauthorized transaction
involving a weak alias (AKA)?
A person who processes an unauthorized transaction involving an SDN
has violated U.S. law and may be subject to an enforcement action.
Generally speaking, however, if (i) the only sanctions reference in
the transaction is a weak AKA, (ii) the person involved in the
processing had no other reason to know that the transaction involved
an SDN or was otherwise in violation of U.S. law, and (iii) the person
maintains a rigorous risk-based compliance program, OFAC will not
issue a civil penalty against an individual or entity for processing
such a transaction.

View File

@@ -4,12 +4,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"SDN OFAC csv file"
"### SDN List : Data Processing"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 490,
"metadata": {},
"outputs": [
{
@@ -152,7 +152,7 @@
"4 -0- -0- -0- -0- -0- -0- "
]
},
"execution_count": 2,
"execution_count": 490,
"metadata": {},
"output_type": "execute_result"
}
@@ -170,7 +170,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 491,
"metadata": {},
"outputs": [
{
@@ -182,7 +182,7 @@
" dtype='object')"
]
},
"execution_count": 3,
"execution_count": 491,
"metadata": {},
"output_type": "execute_result"
}
@@ -206,16 +206,20 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 492,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['ent_num', 'SDN_name', 'SDN_type', 'Remarks', 'country'], dtype='object')"
"-0- 7252\n",
"individual 6915\n",
"vessel 861\n",
"aircraft 374\n",
"Name: SDN_type, dtype: int64"
]
},
"execution_count": 4,
"execution_count": 492,
"metadata": {},
"output_type": "execute_result"
}
@@ -225,131 +229,53 @@
" 'Vess_type', 'Tonnage', 'GRT', 'Vess_flag', 'Vess_owner']\n",
"columns_to_drop = [col_name for col_name in col if col_name in result_df.columns]\n",
"result_df.drop(columns=columns_to_drop, inplace=True)\n",
"result_df.columns"
"result_df['SDN_type'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 493,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"49\n"
]
}
],
"source": [
"\n",
"# ETH ADDRESSES \n",
"# TODO : Get bitcoin and othe addresses as well \n",
"pattern = r'ETH\\s+(0x[0-9a-fA-F]{40})(?=[\\s;])'\n",
"def extract_eth_addresses(remark):\n",
" if isinstance(remark, str):\n",
" return re.findall(pattern, remark)\n",
" return []\n",
"\n",
"eth_addresses = result_df['Remarks'].apply(extract_eth_addresses).explode().dropna().tolist()\n",
"eth_addresses_dict = [{'Eth_address': addr} for addr in eth_addresses]\n",
"print(len(eth_addresses))\n",
"json_result = json.dumps(eth_addresses_dict, indent=4)\n",
"with open('eth_addresses.json', 'w') as f:\n",
" f.write(json_result)\n"
]
},
{
"cell_type": "code",
"execution_count": 494,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-0- 7252\n",
"individual 6915\n",
"vessel 861\n",
"aircraft 374\n",
"Name: SDN_type, dtype: int64\n",
"Cleaned individual 6915\n",
"Name: SDN_type, dtype: int64\n"
]
}
],
"source": [
"print(result_df['SDN_type'].value_counts())\n",
"result_df = result_df[result_df['SDN_type'] == 'individual']\n",
"print(\"Cleaned\",result_df['SDN_type'].value_counts())\n",
"result_df.drop(columns=\"SDN_type\", inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Individual have proper names and some vessels are right name, but most of them are not. Aircrafts seem to be aircraft manufacturers and have codes in names, hence not of any use. -0- seems to be names of company or groups, hence disregarded."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"6915"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(result_df) #total individuals"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6855\n",
"5924\n"
]
}
],
"source": [
"# DOB\n",
"\n",
"result_df['Remarks'] = result_df['Remarks'].str.lower() \n",
"# for format dd mmm yyyy\n",
"result_df['DOB'] = result_df['Remarks'].str.extract(r'(\\d{2} \\w{3} \\d{4})')\n",
"result_df['day'] = result_df['DOB'].str.extract(r'(\\d{2})')\n",
"result_df['month'] = result_df['DOB'].str.extract(r'(\\w{3})')\n",
"result_df['year'] = result_df['DOB'].str.extract(r'(\\d{4})')\n",
"# for yyyy only format\n",
"result_df['year'] = result_df['Remarks'].str.extract(r'(\\d{4})')\n",
"result_df.head()\n",
"\n",
"print(result_df['year'].count()) # total individuals with at least year in dob\n",
"print(result_df['DOB'].count()) # total individuals with whole dob\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4325"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# GENDER\n",
"def extract_gender(text):\n",
" pattern = r'gender (male|female)'\n",
" match = re.search(pattern, text)\n",
" if match:\n",
" return match.group(1)\n",
" else:\n",
" return None\n",
" \n",
"# Apply the function to extract the gender\n",
"result_df[\"Gender\"] = result_df['Remarks'].apply(extract_gender)\n",
"result_df.head()\n",
"result_df[\"Gender\"].count() \n",
"# result_df[result_df['ent_num'] == \"12610\"]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
},
{
"data": {
"text/html": [
@@ -375,50 +301,169 @@
" <th>SDN_name</th>\n",
" <th>Remarks</th>\n",
" <th>country</th>\n",
" <th>DOB</th>\n",
" <th>day</th>\n",
" <th>month</th>\n",
" <th>year</th>\n",
" <th>Gender</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2096</th>\n",
" <td>12599</td>\n",
" <td>PATEK, Umar</td>\n",
" <td>dob 20 jul 1966; pob central java, indonesia; ...</td>\n",
" <th>53</th>\n",
" <td>2674</td>\n",
" <td>ABBAS, Abu</td>\n",
" <td>DOB 10 Dec 1948; Director of PALESTINE LIBERAT...</td>\n",
" <td>-0-</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>2675</td>\n",
" <td>AL RAHMAN, Shaykh Umar Abd</td>\n",
" <td>DOB 03 May 1938; POB Egypt; Chief Ideological ...</td>\n",
" <td>-0-</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>2676</td>\n",
" <td>AL ZAWAHIRI, Dr. Ayman</td>\n",
" <td>DOB 19 Jun 1951; POB Giza, Egypt; Passport 108...</td>\n",
" <td>-0-</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>2677</td>\n",
" <td>AL-ZOMOR, Abboud Abdul Latif Hassan</td>\n",
" <td>DOB 19 Apr 1947; POB Nahia, Giza, Egypt; natio...</td>\n",
" <td>Egypt</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>2678</td>\n",
" <td>AWDA, Abd Al Aziz</td>\n",
" <td>DOB 1946; Chief Ideological Figure of PALESTIN...</td>\n",
" <td>-0-</td>\n",
" <td>20 jul 1966</td>\n",
" <td>20</td>\n",
" <td>jul</td>\n",
" <td>1966</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ent_num SDN_name Remarks \\\n",
"2096 12599 PATEK, Umar dob 20 jul 1966; pob central java, indonesia; ... \n",
" ent_num SDN_name \\\n",
"53 2674 ABBAS, Abu \n",
"54 2675 AL RAHMAN, Shaykh Umar Abd \n",
"55 2676 AL ZAWAHIRI, Dr. Ayman \n",
"56 2677 AL-ZOMOR, Abboud Abdul Latif Hassan \n",
"57 2678 AWDA, Abd Al Aziz \n",
"\n",
" country DOB day month year Gender \n",
"2096 -0- 20 jul 1966 20 jul 1966 None "
" Remarks country \n",
"53 DOB 10 Dec 1948; Director of PALESTINE LIBERAT... -0- \n",
"54 DOB 03 May 1938; POB Egypt; Chief Ideological ... -0- \n",
"55 DOB 19 Jun 1951; POB Giza, Egypt; Passport 108... -0- \n",
"56 DOB 19 Apr 1947; POB Nahia, Giza, Egypt; natio... Egypt \n",
"57 DOB 1946; Chief Ideological Figure of PALESTIN... -0- "
]
},
"execution_count": 9,
"execution_count": 494,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result_df[result_df['ent_num'] == \"12599\"]"
"result_df = result_df[result_df['SDN_type'] == 'individual']\n",
"print(\"Cleaned\",result_df['SDN_type'].value_counts())\n",
"result_df.drop(columns=\"SDN_type\", inplace=True)\n",
"result_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Individual have proper names and some vessels are right name, but most of them are not. Aircrafts seem to be aircraft manufacturers and have codes in names, hence not of any use. -0- seems to be names of company or groups, hence disregarded."
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 495,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"6915"
]
},
"execution_count": 495,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(result_df) #total individuals"
]
},
{
"cell_type": "code",
"execution_count": 496,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6855\n",
"5924\n"
]
}
],
"source": [
"# DOB\n",
"\n",
"# for format dd mmm yyyy\n",
"result_df['DOB'] = result_df['Remarks'].str.extract(r'(\\d{2} \\w{3} \\d{4})')\n",
"result_df['day'] = result_df['DOB'].str.extract(r'(\\d{2})')\n",
"result_df['month'] = result_df['DOB'].str.extract(r'(\\w{3})')\n",
"result_df['year'] = result_df['DOB'].str.extract(r'(\\d{4})')\n",
"# for yyyy only format\n",
"result_df['year'] = result_df['Remarks'].str.extract(r'(\\d{4})')\n",
"result_df.head()\n",
"\n",
"print(result_df['year'].count()) # total individuals with at least year in dob\n",
"print(result_df['DOB'].count()) # total individuals with whole dob\n"
]
},
{
"cell_type": "code",
"execution_count": 497,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 497,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# GENDER\n",
"def extract_gender(text):\n",
" pattern = r'gender (male|female)'\n",
" match = re.search(pattern, text)\n",
" if match:\n",
" return match.group(1)\n",
" else:\n",
" return None\n",
" \n",
"# Apply the function to extract the gender\n",
"result_df[\"Gender\"] = result_df['Remarks'].apply(extract_gender)\n",
"result_df.head()\n",
"result_df[\"Gender\"].count() \n",
"# result_df[result_df['ent_num'] == \"12610\"]"
]
},
{
"cell_type": "code",
"execution_count": 498,
"metadata": {},
"outputs": [
{
@@ -460,16 +505,16 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 499,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1549"
"0"
]
},
"execution_count": 11,
"execution_count": 499,
"metadata": {},
"output_type": "execute_result"
}
@@ -491,63 +536,282 @@
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"11"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pattern = r'eth (0x[0-9a-fA-F]+);'\n",
"\n",
"def extract_eth_address(remark):\n",
" match = re.search(pattern, remark)\n",
" if match:\n",
" return match.group(1)\n",
" else:\n",
" return None\n",
"\n",
"result_df['Eth_address'] = result_df['Remarks'].apply(extract_eth_address)\n",
"\n",
"# Filter rows where 'eth_address' is not null\n",
"filtered_df = result_df.dropna(subset=['Eth_address'])\n",
"filtered_df['Eth_address'].count() # total individuals with eth address"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['ent_num', 'SDN_name', 'Remarks', 'country', 'DOB', 'day', 'month',\n",
" 'year', 'Gender', 'Citizen', 'Nationality', 'Pass_No', 'Pass_Country',\n",
" 'Eth_address'],\n",
" dtype='object')"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result_df.columns"
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 500,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ent_num</th>\n",
" <th>SDN_name</th>\n",
" <th>Remarks</th>\n",
" <th>country</th>\n",
" <th>DOB</th>\n",
" <th>day</th>\n",
" <th>month</th>\n",
" <th>year</th>\n",
" <th>Gender</th>\n",
" <th>Citizen</th>\n",
" <th>Nationality</th>\n",
" <th>Pass_No</th>\n",
" <th>Pass_Country</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>2674</td>\n",
" <td>ABBAS, Abu</td>\n",
" <td>DOB 10 Dec 1948; Director of PALESTINE LIBERAT...</td>\n",
" <td>-0-</td>\n",
" <td>10 Dec 1948</td>\n",
" <td>10</td>\n",
" <td>Dec</td>\n",
" <td>1948</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>2675</td>\n",
" <td>AL RAHMAN, Shaykh Umar Abd</td>\n",
" <td>DOB 03 May 1938; POB Egypt; Chief Ideological ...</td>\n",
" <td>-0-</td>\n",
" <td>03 May 1938</td>\n",
" <td>03</td>\n",
" <td>May</td>\n",
" <td>1938</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>2676</td>\n",
" <td>AL ZAWAHIRI, Dr. Ayman</td>\n",
" <td>DOB 19 Jun 1951; POB Giza, Egypt; Passport 108...</td>\n",
" <td>-0-</td>\n",
" <td>19 Jun 1951</td>\n",
" <td>19</td>\n",
" <td>Jun</td>\n",
" <td>1951</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>2677</td>\n",
" <td>AL-ZOMOR, Abboud Abdul Latif Hassan</td>\n",
" <td>DOB 19 Apr 1947; POB Nahia, Giza, Egypt; natio...</td>\n",
" <td>Egypt</td>\n",
" <td>19 Apr 1947</td>\n",
" <td>19</td>\n",
" <td>Apr</td>\n",
" <td>1947</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>Egypt</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>2678</td>\n",
" <td>AWDA, Abd Al Aziz</td>\n",
" <td>DOB 1946; Chief Ideological Figure of PALESTIN...</td>\n",
" <td>-0-</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1946</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>2679</td>\n",
" <td>FADLALLAH, Shaykh Muhammad Husayn</td>\n",
" <td>DOB 1938; alt. DOB 1936; POB Najf Al Ashraf (N...</td>\n",
" <td>-0-</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1938</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>2681</td>\n",
" <td>HAWATMA, Nayif</td>\n",
" <td>DOB 1933; Secretary General of DEMOCRATIC FRON...</td>\n",
" <td>-0-</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1933</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>2682</td>\n",
" <td>ISLAMBOULI, Mohammad Shawqi</td>\n",
" <td>DOB 15 Jan 1955; POB Egypt; Passport 304555 (E...</td>\n",
" <td>-0-</td>\n",
" <td>15 Jan 1955</td>\n",
" <td>15</td>\n",
" <td>Jan</td>\n",
" <td>1955</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>2683</td>\n",
" <td>JABRIL, Ahmad</td>\n",
" <td>DOB 1938; POB Ramleh, Israel; Secretary Genera...</td>\n",
" <td>-0-</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1938</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>2685</td>\n",
" <td>NAJI, Talal Muhammad Rashid</td>\n",
" <td>DOB 1930; POB Al Nasiria, Palestine; Principal...</td>\n",
" <td>-0-</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1930</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ent_num SDN_name \\\n",
"53 2674 ABBAS, Abu \n",
"54 2675 AL RAHMAN, Shaykh Umar Abd \n",
"55 2676 AL ZAWAHIRI, Dr. Ayman \n",
"56 2677 AL-ZOMOR, Abboud Abdul Latif Hassan \n",
"57 2678 AWDA, Abd Al Aziz \n",
"58 2679 FADLALLAH, Shaykh Muhammad Husayn \n",
"59 2681 HAWATMA, Nayif \n",
"60 2682 ISLAMBOULI, Mohammad Shawqi \n",
"61 2683 JABRIL, Ahmad \n",
"62 2685 NAJI, Talal Muhammad Rashid \n",
"\n",
" Remarks country DOB \\\n",
"53 DOB 10 Dec 1948; Director of PALESTINE LIBERAT... -0- 10 Dec 1948 \n",
"54 DOB 03 May 1938; POB Egypt; Chief Ideological ... -0- 03 May 1938 \n",
"55 DOB 19 Jun 1951; POB Giza, Egypt; Passport 108... -0- 19 Jun 1951 \n",
"56 DOB 19 Apr 1947; POB Nahia, Giza, Egypt; natio... Egypt 19 Apr 1947 \n",
"57 DOB 1946; Chief Ideological Figure of PALESTIN... -0- NaN \n",
"58 DOB 1938; alt. DOB 1936; POB Najf Al Ashraf (N... -0- NaN \n",
"59 DOB 1933; Secretary General of DEMOCRATIC FRON... -0- NaN \n",
"60 DOB 15 Jan 1955; POB Egypt; Passport 304555 (E... -0- 15 Jan 1955 \n",
"61 DOB 1938; POB Ramleh, Israel; Secretary Genera... -0- NaN \n",
"62 DOB 1930; POB Al Nasiria, Palestine; Principal... -0- NaN \n",
"\n",
" day month year Gender Citizen Nationality Pass_No Pass_Country \n",
"53 10 Dec 1948 None None None None None \n",
"54 03 May 1938 None None None None None \n",
"55 19 Jun 1951 None None None None None \n",
"56 19 Apr 1947 None None Egypt None None \n",
"57 NaN NaN 1946 None None None None None \n",
"58 NaN NaN 1938 None None None None None \n",
"59 NaN NaN 1933 None None None None None \n",
"60 15 Jan 1955 None None None None None \n",
"61 NaN NaN 1938 None None None None None \n",
"62 NaN NaN 1930 None None None None None "
]
},
"execution_count": 500,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result_df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 501,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['ent_num', 'SDN_name', 'Remarks', 'country', 'DOB', 'day', 'month',\n",
" 'year', 'Gender', 'Citizen', 'Nationality', 'Pass_No', 'Pass_Country'],\n",
" dtype='object')"
]
},
"execution_count": 501,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result_df.columns"
]
},
{
"cell_type": "code",
"execution_count": 502,
"metadata": {},
"outputs": [
{
@@ -584,7 +848,6 @@
" <th>Nationality</th>\n",
" <th>Pass_No</th>\n",
" <th>Pass_Country</th>\n",
" <th>Eth_address</th>\n",
" <th>Last_Name</th>\n",
" <th>First_Name</th>\n",
" </tr>\n",
@@ -594,18 +857,17 @@
" <th>53</th>\n",
" <td>2674</td>\n",
" <td>ABBAS, Abu</td>\n",
" <td>dob 10 dec 1948; director of palestine liberat...</td>\n",
" <td>DOB 10 Dec 1948; Director of PALESTINE LIBERAT...</td>\n",
" <td>-0-</td>\n",
" <td>10 dec 1948</td>\n",
" <td>10 Dec 1948</td>\n",
" <td>10</td>\n",
" <td>dec</td>\n",
" <td>Dec</td>\n",
" <td>1948</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>ABBAS</td>\n",
" <td>ABU</td>\n",
" </tr>\n",
@@ -613,18 +875,17 @@
" <th>54</th>\n",
" <td>2675</td>\n",
" <td>AL RAHMAN, Shaykh Umar Abd</td>\n",
" <td>dob 03 may 1938; pob egypt; chief ideological ...</td>\n",
" <td>DOB 03 May 1938; POB Egypt; Chief Ideological ...</td>\n",
" <td>-0-</td>\n",
" <td>03 may 1938</td>\n",
" <td>03 May 1938</td>\n",
" <td>03</td>\n",
" <td>may</td>\n",
" <td>May</td>\n",
" <td>1938</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>AL RAHMAN</td>\n",
" <td>SHAYKH UMAR ABD</td>\n",
" </tr>\n",
@@ -632,17 +893,16 @@
" <th>55</th>\n",
" <td>2676</td>\n",
" <td>AL ZAWAHIRI, Dr. Ayman</td>\n",
" <td>dob 19 jun 1951; pob giza, egypt; passport 108...</td>\n",
" <td>DOB 19 Jun 1951; POB Giza, Egypt; Passport 108...</td>\n",
" <td>-0-</td>\n",
" <td>19 jun 1951</td>\n",
" <td>19 Jun 1951</td>\n",
" <td>19</td>\n",
" <td>jun</td>\n",
" <td>Jun</td>\n",
" <td>1951</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>1084010</td>\n",
" <td>egypt</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>AL ZAWAHIRI</td>\n",
" <td>DR. AYMAN</td>\n",
@@ -651,16 +911,15 @@
" <th>56</th>\n",
" <td>2677</td>\n",
" <td>AL-ZOMOR, Abboud Abdul Latif Hassan</td>\n",
" <td>dob 19 apr 1947; pob nahia, giza, egypt; natio...</td>\n",
" <td>DOB 19 Apr 1947; POB Nahia, Giza, Egypt; natio...</td>\n",
" <td>Egypt</td>\n",
" <td>19 apr 1947</td>\n",
" <td>19 Apr 1947</td>\n",
" <td>19</td>\n",
" <td>apr</td>\n",
" <td>Apr</td>\n",
" <td>1947</td>\n",
" <td>male</td>\n",
" <td>None</td>\n",
" <td>egypt</td>\n",
" <td>None</td>\n",
" <td>Egypt</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>AL-ZOMOR</td>\n",
@@ -670,7 +929,7 @@
" <th>57</th>\n",
" <td>2678</td>\n",
" <td>AWDA, Abd Al Aziz</td>\n",
" <td>dob 1946; chief ideological figure of palestin...</td>\n",
" <td>DOB 1946; Chief Ideological Figure of PALESTIN...</td>\n",
" <td>-0-</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
@@ -681,7 +940,6 @@
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>AWDA</td>\n",
" <td>ABD AL AZIZ</td>\n",
" </tr>\n",
@@ -698,28 +956,28 @@
"57 2678 AWDA, Abd Al Aziz \n",
"\n",
" Remarks country DOB \\\n",
"53 dob 10 dec 1948; director of palestine liberat... -0- 10 dec 1948 \n",
"54 dob 03 may 1938; pob egypt; chief ideological ... -0- 03 may 1938 \n",
"55 dob 19 jun 1951; pob giza, egypt; passport 108... -0- 19 jun 1951 \n",
"56 dob 19 apr 1947; pob nahia, giza, egypt; natio... Egypt 19 apr 1947 \n",
"57 dob 1946; chief ideological figure of palestin... -0- NaN \n",
"53 DOB 10 Dec 1948; Director of PALESTINE LIBERAT... -0- 10 Dec 1948 \n",
"54 DOB 03 May 1938; POB Egypt; Chief Ideological ... -0- 03 May 1938 \n",
"55 DOB 19 Jun 1951; POB Giza, Egypt; Passport 108... -0- 19 Jun 1951 \n",
"56 DOB 19 Apr 1947; POB Nahia, Giza, Egypt; natio... Egypt 19 Apr 1947 \n",
"57 DOB 1946; Chief Ideological Figure of PALESTIN... -0- NaN \n",
"\n",
" day month year Gender Citizen Nationality Pass_No Pass_Country \\\n",
"53 10 dec 1948 None None None None None \n",
"54 03 may 1938 None None None None None \n",
"55 19 jun 1951 None None None 1084010 egypt \n",
"56 19 apr 1947 male None egypt None None \n",
"57 NaN NaN 1946 None None None None None \n",
" day month year Gender Citizen Nationality Pass_No Pass_Country \\\n",
"53 10 Dec 1948 None None None None None \n",
"54 03 May 1938 None None None None None \n",
"55 19 Jun 1951 None None None None None \n",
"56 19 Apr 1947 None None Egypt None None \n",
"57 NaN NaN 1946 None None None None None \n",
"\n",
" Eth_address Last_Name First_Name \n",
"53 None ABBAS ABU \n",
"54 None AL RAHMAN SHAYKH UMAR ABD \n",
"55 None AL ZAWAHIRI DR. AYMAN \n",
"56 None AL-ZOMOR ABBOUD ABDUL LATIF HASSAN \n",
"57 None AWDA ABD AL AZIZ "
" Last_Name First_Name \n",
"53 ABBAS ABU \n",
"54 AL RAHMAN SHAYKH UMAR ABD \n",
"55 AL ZAWAHIRI DR. AYMAN \n",
"56 AL-ZOMOR ABBOUD ABDUL LATIF HASSAN \n",
"57 AWDA ABD AL AZIZ "
]
},
"execution_count": 14,
"execution_count": 502,
"metadata": {},
"output_type": "execute_result"
}
@@ -729,13 +987,12 @@
"result_df[['Last_Name', 'First_Name']] = result_df['SDN_name'].str.split(', ', expand=True, n=1)\n",
"result_df['Last_Name'] = result_df['Last_Name'].str.upper()\n",
"result_df['First_Name'] = result_df['First_Name'].str.upper()\n",
"\n",
"result_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 503,
"metadata": {},
"outputs": [],
"source": [
@@ -744,18 +1001,12 @@
"\n",
"filtered_df = result_df.dropna(subset=['Pass_No', 'Pass_Country'], how='all')\n",
"passport_df = filtered_df[['Pass_No', 'Pass_Country']]\n",
"filtered_df = result_df.dropna(subset=['Eth_address'], how='all')\n",
"eth_df = filtered_df[['Eth_address']]\n",
"name_dob = result_df[['First_Name', 'Last_Name', 'day', 'month', 'year']]\n",
"\n",
"passport_list = passport_df.to_dict(orient='records')\n",
"with open('passports.json', 'w') as json_file:\n",
" json.dump(passport_list, json_file, indent=4)\n",
"\n",
"etherum_list = eth_df.to_dict(orient='records')\n",
"with open('etherum_add.json','w') as json_file:\n",
" json.dump(etherum_list, json_file, indent=4)\n",
"\n",
" \n",
"name_list = name_dob.to_dict(orient='records')\n",
"with open('names.json', 'w') as json_file:\n",
" json.dump(name_list, json_file, indent=4)"

View File

@@ -10,7 +10,7 @@ import {
import { LeanIMT } from "@zk-kit/lean-imt";
import { IMT } from "@zk-kit/imt";
import { getLeaf } from "./pubkeyTree";
import { getPassportleaf } from "./passportTree";
import { getPassportNumberleaf } from "./ofacTree";
import serializedTree from "../../pubkeys/serialized_tree.json";
import { poseidon2, poseidon6 } from "poseidon-lite";
import { packBytes } from "../utils/utils";
@@ -168,7 +168,7 @@ export function generateCircuitInputsOfac(
) {
const mrz_bytes = formatMrz(passportData.mrz);
const passport_leaf = getPassportleaf(mrz_bytes.slice(49,58))
const passport_leaf = getPassportNumberleaf(mrz_bytes.slice(49,58))
const {root, depth, closestleaf, indices, exSiblings, membership} = generateSMTProof(merkletree, passport_leaf);
let exists = membership ? 1 : 0;

View File

@@ -8,7 +8,7 @@ import * as fs from 'fs';
// 2. Names and dob combo tree : level 2 (High Probability Match)
// 3. Names tree : level 1 (Partial Match)
export function passport_smt(): [SMT,SMT,SMT] {
export function ofac_smt(): [SMT,SMT,SMT] {
let startTime = performance.now();
//Path wrt where it is called from, i.e circuits. Replace when export and import through json
@@ -75,7 +75,7 @@ function processPassport(passno : string, index: number): bigint {
}
}
const leaf = getPassportleaf(stringToAsciiBigIntArray(passno))
const leaf = getPassportNumberleaf(stringToAsciiBigIntArray(passno))
if (!leaf) {
console.log('Error creating leaf value', index, passno)
return BigInt(0)
@@ -148,7 +148,7 @@ function processDob(day: string, month: string, year: string, i : number): bigin
return getDobLeaf(arr,i)
}
export function getPassportleaf(passport: (bigint|number)[], i?: number): bigint {
export function getPassportNumberleaf(passport: (bigint|number)[], i?: number): bigint {
if (passport.length !== 9) {
console.log('parsed passport length is not 9:', i, passport)
return

File diff suppressed because one or more lines are too long