mirror of
https://github.com/JHUAPL/PINE.git
synced 2026-01-08 20:17:54 -05:00
Merge pull request #61 from jhuapl-lglenden/develop-updates
Updates from latest development branch.
This commit is contained in:
1
.env
1
.env
@@ -10,6 +10,7 @@ EVE_DB_VOLUME=eve_db
|
||||
OPENNLP_ID=5babb6ee4eb7dd2c39b9671c
|
||||
CORENLP_ID=5babb6ee4eb7dd2c39b9671d
|
||||
DOCUMENT_CLASSIFIER_ID=5babb6ee4eb7dd2c39b9671b
|
||||
SIMPLETRANSFORMERS_ID=5babb6ee4eb7dd2c39b96720
|
||||
|
||||
EXPOSED_SERVER_TYPE=https
|
||||
EXPOSED_SERVER_NAME=localhost
|
||||
|
||||
@@ -25,6 +25,7 @@ scipy = "~=1.7.1"
|
||||
tabulate = "~=0.8.9"
|
||||
multiprocessing-logging = "~=0.3.1"
|
||||
flask-httpauth = "~=4.4.0"
|
||||
lxml = "~=4.6.3"
|
||||
|
||||
[dev-packages]
|
||||
|
||||
|
||||
322
backend/Pipfile.lock
generated
322
backend/Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "3a30a022ccd4fbe028c8cf2c8f741b9c7f7fa72e039dba391da62a20e58c5273"
|
||||
"sha256": "382dbc37f5349e1a9d22b266891cad743be81ff76fe395c112c157b6a110ed62"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
@@ -55,10 +55,10 @@
|
||||
},
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee",
|
||||
"sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8"
|
||||
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
|
||||
"sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
|
||||
],
|
||||
"version": "==2021.5.30"
|
||||
"version": "==2021.10.8"
|
||||
},
|
||||
"cffi": {
|
||||
"hashes": [
|
||||
@@ -112,39 +112,45 @@
|
||||
},
|
||||
"charset-normalizer": {
|
||||
"hashes": [
|
||||
"sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b",
|
||||
"sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3"
|
||||
"sha256:e019de665e2bcf9c2b64e2e5aa025fa991da8720daa3c1138cadd2fd1856aed0",
|
||||
"sha256:f7af805c321bfa1ce6714c51f254e0d5bb5e5834039bc17db7ebe3a4cec9492b"
|
||||
],
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==2.0.4"
|
||||
"version": "==2.0.7"
|
||||
},
|
||||
"click": {
|
||||
"hashes": [
|
||||
"sha256:8c04c11192119b1ef78ea049e0a6f0463e4c48ef00a30160c704337586f3ad7a",
|
||||
"sha256:fba402a4a47334742d782209a7c79bc448911afe1149d07bdabdf480b3e2f4b6"
|
||||
"sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3",
|
||||
"sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==8.0.1"
|
||||
"version": "==8.0.3"
|
||||
},
|
||||
"cryptography": {
|
||||
"hashes": [
|
||||
"sha256:0f1212a66329c80d68aeeb39b8a16d54ef57071bf22ff4e521657b27372e327d",
|
||||
"sha256:1e056c28420c072c5e3cb36e2b23ee55e260cb04eee08f702e0edfec3fb51959",
|
||||
"sha256:240f5c21aef0b73f40bb9f78d2caff73186700bf1bc6b94285699aff98cc16c6",
|
||||
"sha256:26965837447f9c82f1855e0bc8bc4fb910240b6e0d16a664bb722df3b5b06873",
|
||||
"sha256:37340614f8a5d2fb9aeea67fd159bfe4f5f4ed535b1090ce8ec428b2f15a11f2",
|
||||
"sha256:3d10de8116d25649631977cb37da6cbdd2d6fa0e0281d014a5b7d337255ca713",
|
||||
"sha256:3d8427734c781ea5f1b41d6589c293089704d4759e34597dce91014ac125aad1",
|
||||
"sha256:7ec5d3b029f5fa2b179325908b9cd93db28ab7b85bb6c1db56b10e0b54235177",
|
||||
"sha256:8e56e16617872b0957d1c9742a3f94b43533447fd78321514abbe7db216aa250",
|
||||
"sha256:b01fd6f2737816cb1e08ed4807ae194404790eac7ad030b34f2ce72b332f5586",
|
||||
"sha256:bf40af59ca2465b24e54f671b2de2c59257ddc4f7e5706dbd6930e26823668d3",
|
||||
"sha256:de4e5f7f68220d92b7637fc99847475b59154b7a1b3868fb7385337af54ac9ca",
|
||||
"sha256:eb8cc2afe8b05acbd84a43905832ec78e7b3873fb124ca190f574dca7389a87d",
|
||||
"sha256:ee77aa129f481be46f8d92a1a7db57269a2f23052d5f2433b4621bb457081cc9"
|
||||
"sha256:07bb7fbfb5de0980590ddfc7f13081520def06dc9ed214000ad4372fb4e3c7f6",
|
||||
"sha256:18d90f4711bf63e2fb21e8c8e51ed8189438e6b35a6d996201ebd98a26abbbe6",
|
||||
"sha256:1ed82abf16df40a60942a8c211251ae72858b25b7421ce2497c2eb7a1cee817c",
|
||||
"sha256:22a38e96118a4ce3b97509443feace1d1011d0571fae81fc3ad35f25ba3ea999",
|
||||
"sha256:2d69645f535f4b2c722cfb07a8eab916265545b3475fdb34e0be2f4ee8b0b15e",
|
||||
"sha256:4a2d0e0acc20ede0f06ef7aa58546eee96d2592c00f450c9acb89c5879b61992",
|
||||
"sha256:54b2605e5475944e2213258e0ab8696f4f357a31371e538ef21e8d61c843c28d",
|
||||
"sha256:7075b304cd567694dc692ffc9747f3e9cb393cc4aa4fb7b9f3abd6f5c4e43588",
|
||||
"sha256:7b7ceeff114c31f285528ba8b390d3e9cfa2da17b56f11d366769a807f17cbaa",
|
||||
"sha256:7eba2cebca600a7806b893cb1d541a6e910afa87e97acf2021a22b32da1df52d",
|
||||
"sha256:928185a6d1ccdb816e883f56ebe92e975a262d31cc536429041921f8cb5a62fd",
|
||||
"sha256:9933f28f70d0517686bd7de36166dda42094eac49415459d9bdf5e7df3e0086d",
|
||||
"sha256:a688ebcd08250eab5bb5bca318cc05a8c66de5e4171a65ca51db6bd753ff8953",
|
||||
"sha256:abb5a361d2585bb95012a19ed9b2c8f412c5d723a9836418fab7aaa0243e67d2",
|
||||
"sha256:c10c797ac89c746e488d2ee92bd4abd593615694ee17b2500578b63cad6b93a8",
|
||||
"sha256:ced40344e811d6abba00295ced98c01aecf0c2de39481792d87af4fa58b7b4d6",
|
||||
"sha256:d57e0cdc1b44b6cdf8af1d01807db06886f10177469312fbde8f44ccbb284bc9",
|
||||
"sha256:d99915d6ab265c22873f1b4d6ea5ef462ef797b4140be4c9d8b179915e0985c6",
|
||||
"sha256:eb80e8a1f91e4b7ef8b33041591e6d89b2b8e122d787e87eeb2b08da71bb16ad",
|
||||
"sha256:ebeddd119f526bcf323a89f853afb12e225902a24d29b55fe18dd6fcb2838a76"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.4.7"
|
||||
"version": "==35.0.0"
|
||||
},
|
||||
"cycler": {
|
||||
"hashes": [
|
||||
@@ -155,11 +161,11 @@
|
||||
},
|
||||
"flask": {
|
||||
"hashes": [
|
||||
"sha256:1c4c257b1892aec1398784c63791cbaa43062f1f7aeb555c4da961b20ee68f55",
|
||||
"sha256:a6209ca15eb63fc9385f38e452704113d679511d9574d09b2cf9183ae7d20dc9"
|
||||
"sha256:7b2fb8e934ddd50731893bdcdb00fc8c0315916f9fcd50d22c7cc1a95ab634e2",
|
||||
"sha256:cb90f62f1d8e4dc4621f52106613488b5ba826b2e1e10a33eac92f723093ab6a"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.0.1"
|
||||
"version": "==2.0.2"
|
||||
},
|
||||
"flask-cors": {
|
||||
"hashes": [
|
||||
@@ -195,49 +201,115 @@
|
||||
},
|
||||
"jinja2": {
|
||||
"hashes": [
|
||||
"sha256:1f06f2da51e7b56b8f238affdd6b4e2c61e39598a378cc49345bc1bd42a978a4",
|
||||
"sha256:703f484b47a6af502e743c9122595cc812b0271f661722403114f71a79d0f5a4"
|
||||
"sha256:827a0e32839ab1600d4eb1c4c33ec5a8edfbc5cb42dafa13b81f182f97784b45",
|
||||
"sha256:8569982d3f0889eed11dd620c706d39b60c36d6d25843961f33f77fb6bc6b20c"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.0.1"
|
||||
"version": "==3.0.2"
|
||||
},
|
||||
"kiwisolver": {
|
||||
"hashes": [
|
||||
"sha256:0cd53f403202159b44528498de18f9285b04482bab2a6fc3f5dd8dbb9352e30d",
|
||||
"sha256:1e1bc12fb773a7b2ffdeb8380609f4f8064777877b2225dec3da711b421fda31",
|
||||
"sha256:225e2e18f271e0ed8157d7f4518ffbf99b9450fca398d561eb5c4a87d0986dd9",
|
||||
"sha256:232c9e11fd7ac3a470d65cd67e4359eee155ec57e822e5220322d7b2ac84fbf0",
|
||||
"sha256:31dfd2ac56edc0ff9ac295193eeaea1c0c923c0355bf948fbd99ed6018010b72",
|
||||
"sha256:33449715e0101e4d34f64990352bce4095c8bf13bed1b390773fc0a7295967b3",
|
||||
"sha256:401a2e9afa8588589775fe34fc22d918ae839aaaf0c0e96441c0fdbce6d8ebe6",
|
||||
"sha256:44a62e24d9b01ba94ae7a4a6c3fb215dc4af1dde817e7498d901e229aaf50e4e",
|
||||
"sha256:50af681a36b2a1dee1d3c169ade9fdc59207d3c31e522519181e12f1b3ba7000",
|
||||
"sha256:563c649cfdef27d081c84e72a03b48ea9408c16657500c312575ae9d9f7bc1c3",
|
||||
"sha256:5989db3b3b34b76c09253deeaf7fbc2707616f130e166996606c284395da3f18",
|
||||
"sha256:5a7a7dbff17e66fac9142ae2ecafb719393aaee6a3768c9de2fd425c63b53e21",
|
||||
"sha256:5c3e6455341008a054cccee8c5d24481bcfe1acdbc9add30aa95798e95c65621",
|
||||
"sha256:5f6ccd3dd0b9739edcf407514016108e2280769c73a85b9e59aa390046dbf08b",
|
||||
"sha256:72c99e39d005b793fb7d3d4e660aed6b6281b502e8c1eaf8ee8346023c8e03bc",
|
||||
"sha256:78751b33595f7f9511952e7e60ce858c6d64db2e062afb325985ddbd34b5c131",
|
||||
"sha256:834ee27348c4aefc20b479335fd422a2c69db55f7d9ab61721ac8cd83eb78882",
|
||||
"sha256:8be8d84b7d4f2ba4ffff3665bcd0211318aa632395a1a41553250484a871d454",
|
||||
"sha256:950a199911a8d94683a6b10321f9345d5a3a8433ec58b217ace979e18f16e248",
|
||||
"sha256:a357fd4f15ee49b4a98b44ec23a34a95f1e00292a139d6015c11f55774ef10de",
|
||||
"sha256:a53d27d0c2a0ebd07e395e56a1fbdf75ffedc4a05943daf472af163413ce9598",
|
||||
"sha256:acef3d59d47dd85ecf909c359d0fd2c81ed33bdff70216d3956b463e12c38a54",
|
||||
"sha256:b38694dcdac990a743aa654037ff1188c7a9801ac3ccc548d3341014bc5ca278",
|
||||
"sha256:b9edd0110a77fc321ab090aaa1cfcaba1d8499850a12848b81be2222eab648f6",
|
||||
"sha256:c08e95114951dc2090c4a630c2385bef681cacf12636fb0241accdc6b303fd81",
|
||||
"sha256:c5518d51a0735b1e6cee1fdce66359f8d2b59c3ca85dc2b0813a8aa86818a030",
|
||||
"sha256:c8fd0f1ae9d92b42854b2979024d7597685ce4ada367172ed7c09edf2cef9cb8",
|
||||
"sha256:ca3820eb7f7faf7f0aa88de0e54681bddcb46e485beb844fcecbcd1c8bd01689",
|
||||
"sha256:cf8b574c7b9aa060c62116d4181f3a1a4e821b2ec5cbfe3775809474113748d4",
|
||||
"sha256:d3155d828dec1d43283bd24d3d3e0d9c7c350cdfcc0bd06c0ad1209c1bbc36d0",
|
||||
"sha256:f8d6f8db88049a699817fd9178782867bf22283e3813064302ac59f61d95be05",
|
||||
"sha256:fd34fbbfbc40628200730bc1febe30631347103fc8d3d4fa012c21ab9c11eca9"
|
||||
"sha256:0007840186bacfaa0aba4466d5890334ea5938e0bb7e28078a0eb0e63b5b59d5",
|
||||
"sha256:19554bd8d54cf41139f376753af1a644b63c9ca93f8f72009d50a2080f870f77",
|
||||
"sha256:1d45d1c74f88b9f41062716c727f78f2a59a5476ecbe74956fafb423c5c87a76",
|
||||
"sha256:1d819553730d3c2724582124aee8a03c846ec4362ded1034c16fb3ef309264e6",
|
||||
"sha256:2210f28778c7d2ee13f3c2a20a3a22db889e75f4ec13a21072eabb5693801e84",
|
||||
"sha256:22521219ca739654a296eea6d4367703558fba16f98688bd8ce65abff36eaa84",
|
||||
"sha256:25405f88a37c5f5bcba01c6e350086d65e7465fd1caaf986333d2a045045a223",
|
||||
"sha256:2b65bd35f3e06a47b5c30ea99e0c2b88f72c6476eedaf8cfbc8e66adb5479dcf",
|
||||
"sha256:2ddb500a2808c100e72c075cbb00bf32e62763c82b6a882d403f01a119e3f402",
|
||||
"sha256:2f8f6c8f4f1cff93ca5058d6ec5f0efda922ecb3f4c5fb76181f327decff98b8",
|
||||
"sha256:30fa008c172355c7768159983a7270cb23838c4d7db73d6c0f6b60dde0d432c6",
|
||||
"sha256:3dbb3cea20b4af4f49f84cffaf45dd5f88e8594d18568e0225e6ad9dec0e7967",
|
||||
"sha256:4116ba9a58109ed5e4cb315bdcbff9838f3159d099ba5259c7c7fb77f8537492",
|
||||
"sha256:44e6adf67577dbdfa2d9f06db9fbc5639afefdb5bf2b4dfec25c3a7fbc619536",
|
||||
"sha256:5326ddfacbe51abf9469fe668944bc2e399181a2158cb5d45e1d40856b2a0589",
|
||||
"sha256:70adc3658138bc77a36ce769f5f183169bc0a2906a4f61f09673f7181255ac9b",
|
||||
"sha256:72be6ebb4e92520b9726d7146bc9c9b277513a57a38efcf66db0620aec0097e0",
|
||||
"sha256:7843b1624d6ccca403a610d1277f7c28ad184c5aa88a1750c1a999754e65b439",
|
||||
"sha256:7ba5a1041480c6e0a8b11a9544d53562abc2d19220bfa14133e0cdd9967e97af",
|
||||
"sha256:80efd202108c3a4150e042b269f7c78643420cc232a0a771743bb96b742f838f",
|
||||
"sha256:82f49c5a79d3839bc8f38cb5f4bfc87e15f04cbafa5fbd12fb32c941cb529cfb",
|
||||
"sha256:83d2c9db5dfc537d0171e32de160461230eb14663299b7e6d18ca6dca21e4977",
|
||||
"sha256:8d93a1095f83e908fc253f2fb569c2711414c0bfd451cab580466465b235b470",
|
||||
"sha256:8dc3d842fa41a33fe83d9f5c66c0cc1f28756530cd89944b63b072281e852031",
|
||||
"sha256:9661a04ca3c950a8ac8c47f53cbc0b530bce1b52f516a1e87b7736fec24bfff0",
|
||||
"sha256:a498bcd005e8a3fedd0022bb30ee0ad92728154a8798b703f394484452550507",
|
||||
"sha256:a7a4cf5bbdc861987a7745aed7a536c6405256853c94abc9f3287c3fa401b174",
|
||||
"sha256:b5074fb09429f2b7bc82b6fb4be8645dcbac14e592128beeff5461dcde0af09f",
|
||||
"sha256:b6a5431940f28b6de123de42f0eb47b84a073ee3c3345dc109ad550a3307dd28",
|
||||
"sha256:ba677bcaff9429fd1bf01648ad0901cea56c0d068df383d5f5856d88221fe75b",
|
||||
"sha256:bcadb05c3d4794eb9eee1dddf1c24215c92fb7b55a80beae7a60530a91060560",
|
||||
"sha256:bf7eb45d14fc036514c09554bf983f2a72323254912ed0c3c8e697b62c4c158f",
|
||||
"sha256:c358721aebd40c243894298f685a19eb0491a5c3e0b923b9f887ef1193ddf829",
|
||||
"sha256:c4550a359c5157aaf8507e6820d98682872b9100ce7607f8aa070b4b8af6c298",
|
||||
"sha256:c6572c2dab23c86a14e82c245473d45b4c515314f1f859e92608dcafbd2f19b8",
|
||||
"sha256:cba430db673c29376135e695c6e2501c44c256a81495da849e85d1793ee975ad",
|
||||
"sha256:dedc71c8eb9c5096037766390172c34fb86ef048b8e8958b4e484b9e505d66bc",
|
||||
"sha256:e6f5eb2f53fac7d408a45fbcdeda7224b1cfff64919d0f95473420a931347ae9",
|
||||
"sha256:ec2eba188c1906b05b9b49ae55aae4efd8150c61ba450e6721f64620c50b59eb",
|
||||
"sha256:ee040a7de8d295dbd261ef2d6d3192f13e2b08ec4a954de34a6fb8ff6422e24c",
|
||||
"sha256:eedd3b59190885d1ebdf6c5e0ca56828beb1949b4dfe6e5d0256a461429ac386",
|
||||
"sha256:f441422bb313ab25de7b3dbfd388e790eceb76ce01a18199ec4944b369017009",
|
||||
"sha256:f8eb7b6716f5b50e9c06207a14172cf2de201e41912ebe732846c02c830455b9",
|
||||
"sha256:fc4453705b81d03568d5b808ad8f09c77c47534f6ac2e72e733f9ca4714aa75c"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==1.3.1"
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.3.2"
|
||||
},
|
||||
"lxml": {
|
||||
"hashes": [
|
||||
"sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d",
|
||||
"sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3",
|
||||
"sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2",
|
||||
"sha256:1b38116b6e628118dea5b2186ee6820ab138dbb1e24a13e478490c7db2f326ae",
|
||||
"sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f",
|
||||
"sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927",
|
||||
"sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3",
|
||||
"sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7",
|
||||
"sha256:3082c518be8e97324390614dacd041bb1358c882d77108ca1957ba47738d9d59",
|
||||
"sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f",
|
||||
"sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade",
|
||||
"sha256:36108c73739985979bf302006527cf8a20515ce444ba916281d1c43938b8bb96",
|
||||
"sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468",
|
||||
"sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b",
|
||||
"sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4",
|
||||
"sha256:4c61b3a0db43a1607d6264166b230438f85bfed02e8cff20c22e564d0faff354",
|
||||
"sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83",
|
||||
"sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04",
|
||||
"sha256:5c8c163396cc0df3fd151b927e74f6e4acd67160d6c33304e805b84293351d16",
|
||||
"sha256:64812391546a18896adaa86c77c59a4998f33c24788cadc35789e55b727a37f4",
|
||||
"sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791",
|
||||
"sha256:6f12e1427285008fd32a6025e38e977d44d6382cf28e7201ed10d6c1698d2a9a",
|
||||
"sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51",
|
||||
"sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1",
|
||||
"sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a",
|
||||
"sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f",
|
||||
"sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee",
|
||||
"sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec",
|
||||
"sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969",
|
||||
"sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28",
|
||||
"sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a",
|
||||
"sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa",
|
||||
"sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106",
|
||||
"sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d",
|
||||
"sha256:c1a40c06fd5ba37ad39caa0b3144eb3772e813b5fb5b084198a985431c2f1e8d",
|
||||
"sha256:c47ff7e0a36d4efac9fd692cfa33fbd0636674c102e9e8d9b26e1b93a94e7617",
|
||||
"sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4",
|
||||
"sha256:cdaf11d2bd275bf391b5308f86731e5194a21af45fbaaaf1d9e8147b9160ea92",
|
||||
"sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0",
|
||||
"sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4",
|
||||
"sha256:d916d31fd85b2f78c76400d625076d9124de3e4bda8b016d25a050cc7d603f24",
|
||||
"sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2",
|
||||
"sha256:e1cbd3f19a61e27e011e02f9600837b921ac661f0c40560eefb366e4e4fb275e",
|
||||
"sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0",
|
||||
"sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654",
|
||||
"sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2",
|
||||
"sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23",
|
||||
"sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.6.3"
|
||||
},
|
||||
"markupsafe": {
|
||||
"hashes": [
|
||||
@@ -387,56 +459,70 @@
|
||||
},
|
||||
"pebble": {
|
||||
"hashes": [
|
||||
"sha256:556de0f4c65f943b73ba85ab4621f18000864d42a9d562c470ce7bf396d96424",
|
||||
"sha256:b0abdc8830c21307038d63454584f71c2943e542e4e9d4c86d67aebc06c3519b"
|
||||
"sha256:46e02767b239a29b8150466514fabb5c6632bea8c9b7456dfdb715f4636fc8a3",
|
||||
"sha256:694e1105db888f3576b8f00662f90b057cf3780e6f8b7f57955a568008d0f497"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.6.1"
|
||||
"version": "==4.6.3"
|
||||
},
|
||||
"pillow": {
|
||||
"hashes": [
|
||||
"sha256:0b2efa07f69dc395d95bb9ef3299f4ca29bcb2157dc615bae0b42c3c20668ffc",
|
||||
"sha256:114f816e4f73f9ec06997b2fde81a92cbf0777c9e8f462005550eed6bae57e63",
|
||||
"sha256:147bd9e71fb9dcf08357b4d530b5167941e222a6fd21f869c7911bac40b9994d",
|
||||
"sha256:15a2808e269a1cf2131930183dcc0419bc77bb73eb54285dde2706ac9939fa8e",
|
||||
"sha256:196560dba4da7a72c5e7085fccc5938ab4075fd37fe8b5468869724109812edd",
|
||||
"sha256:1c03e24be975e2afe70dfc5da6f187eea0b49a68bb2b69db0f30a61b7031cee4",
|
||||
"sha256:1fd5066cd343b5db88c048d971994e56b296868766e461b82fa4e22498f34d77",
|
||||
"sha256:29c9569049d04aaacd690573a0398dbd8e0bf0255684fee512b413c2142ab723",
|
||||
"sha256:2b6dfa068a8b6137da34a4936f5a816aba0ecc967af2feeb32c4393ddd671cba",
|
||||
"sha256:2cac53839bfc5cece8fdbe7f084d5e3ee61e1303cccc86511d351adcb9e2c792",
|
||||
"sha256:2ee77c14a0299d0541d26f3d8500bb57e081233e3fa915fa35abd02c51fa7fae",
|
||||
"sha256:37730f6e68bdc6a3f02d2079c34c532330d206429f3cee651aab6b66839a9f0e",
|
||||
"sha256:3f08bd8d785204149b5b33e3b5f0ebbfe2190ea58d1a051c578e29e39bfd2367",
|
||||
"sha256:479ab11cbd69612acefa8286481f65c5dece2002ffaa4f9db62682379ca3bb77",
|
||||
"sha256:4bc3c7ef940eeb200ca65bd83005eb3aae8083d47e8fcbf5f0943baa50726856",
|
||||
"sha256:660a87085925c61a0dcc80efb967512ac34dbb256ff7dd2b9b4ee8dbdab58cf4",
|
||||
"sha256:67b3666b544b953a2777cb3f5a922e991be73ab32635666ee72e05876b8a92de",
|
||||
"sha256:70af7d222df0ff81a2da601fab42decb009dc721545ed78549cb96e3a1c5f0c8",
|
||||
"sha256:75e09042a3b39e0ea61ce37e941221313d51a9c26b8e54e12b3ececccb71718a",
|
||||
"sha256:8960a8a9f4598974e4c2aeb1bff9bdd5db03ee65fd1fce8adf3223721aa2a636",
|
||||
"sha256:9364c81b252d8348e9cc0cb63e856b8f7c1b340caba6ee7a7a65c968312f7dab",
|
||||
"sha256:969cc558cca859cadf24f890fc009e1bce7d7d0386ba7c0478641a60199adf79",
|
||||
"sha256:9a211b663cf2314edbdb4cf897beeb5c9ee3810d1d53f0e423f06d6ebbf9cd5d",
|
||||
"sha256:a17ca41f45cf78c2216ebfab03add7cc350c305c38ff34ef4eef66b7d76c5229",
|
||||
"sha256:a2f381932dca2cf775811a008aa3027671ace723b7a38838045b1aee8669fdcf",
|
||||
"sha256:a4eef1ff2d62676deabf076f963eda4da34b51bc0517c70239fafed1d5b51500",
|
||||
"sha256:c088a000dfdd88c184cc7271bfac8c5b82d9efa8637cd2b68183771e3cf56f04",
|
||||
"sha256:c0e0550a404c69aab1e04ae89cca3e2a042b56ab043f7f729d984bf73ed2a093",
|
||||
"sha256:c11003197f908878164f0e6da15fce22373ac3fc320cda8c9d16e6bba105b844",
|
||||
"sha256:c2a5ff58751670292b406b9f06e07ed1446a4b13ffced6b6cab75b857485cbc8",
|
||||
"sha256:c35d09db702f4185ba22bb33ef1751ad49c266534339a5cebeb5159d364f6f82",
|
||||
"sha256:c379425c2707078dfb6bfad2430728831d399dc95a7deeb92015eb4c92345eaf",
|
||||
"sha256:cc866706d56bd3a7dbf8bac8660c6f6462f2f2b8a49add2ba617bc0c54473d83",
|
||||
"sha256:d0da39795049a9afcaadec532e7b669b5ebbb2a9134576ebcc15dd5bdae33cc0",
|
||||
"sha256:f156d6ecfc747ee111c167f8faf5f4953761b5e66e91a4e6767e548d0f80129c",
|
||||
"sha256:f4ebde71785f8bceb39dcd1e7f06bcc5d5c3cf48b9f69ab52636309387b097c8",
|
||||
"sha256:fc214a6b75d2e0ea7745488da7da3c381f41790812988c7a92345978414fad37",
|
||||
"sha256:fd7eef578f5b2200d066db1b50c4aa66410786201669fb76d5238b007918fb24",
|
||||
"sha256:ff04c373477723430dce2e9d024c708a047d44cf17166bf16e604b379bf0ca14"
|
||||
"sha256:0412516dcc9de9b0a1e0ae25a280015809de8270f134cc2c1e32c4eeb397cf30",
|
||||
"sha256:04835e68ef12904bc3e1fd002b33eea0779320d4346082bd5b24bec12ad9c3e9",
|
||||
"sha256:06d1adaa284696785375fa80a6a8eb309be722cf4ef8949518beb34487a3df71",
|
||||
"sha256:085a90a99404b859a4b6c3daa42afde17cb3ad3115e44a75f0d7b4a32f06a6c9",
|
||||
"sha256:0b9911ec70731711c3b6ebcde26caea620cbdd9dcb73c67b0730c8817f24711b",
|
||||
"sha256:10e00f7336780ca7d3653cf3ac26f068fa11b5a96894ea29a64d3dc4b810d630",
|
||||
"sha256:11c27e74bab423eb3c9232d97553111cc0be81b74b47165f07ebfdd29d825875",
|
||||
"sha256:11eb7f98165d56042545c9e6db3ce394ed8b45089a67124298f0473b29cb60b2",
|
||||
"sha256:13654b521fb98abdecec105ea3fb5ba863d1548c9b58831dd5105bb3873569f1",
|
||||
"sha256:15ccb81a6ffc57ea0137f9f3ac2737ffa1d11f786244d719639df17476d399a7",
|
||||
"sha256:18a07a683805d32826c09acfce44a90bf474e6a66ce482b1c7fcd3757d588df3",
|
||||
"sha256:19ec4cfe4b961edc249b0e04b5618666c23a83bc35842dea2bfd5dfa0157f81b",
|
||||
"sha256:1c3ff00110835bdda2b1e2b07f4a2548a39744bb7de5946dc8e95517c4fb2ca6",
|
||||
"sha256:27a330bf7014ee034046db43ccbb05c766aa9e70b8d6c5260bfc38d73103b0ba",
|
||||
"sha256:2b11c9d310a3522b0fd3c35667914271f570576a0e387701f370eb39d45f08a4",
|
||||
"sha256:2c661542c6f71dfd9dc82d9d29a8386287e82813b0375b3a02983feac69ef864",
|
||||
"sha256:2cde7a4d3687f21cffdf5bb171172070bb95e02af448c4c8b2f223d783214056",
|
||||
"sha256:2d5e9dc0bf1b5d9048a94c48d0813b6c96fccfa4ccf276d9c36308840f40c228",
|
||||
"sha256:2f23b2d3079522fdf3c09de6517f625f7a964f916c956527bed805ac043799b8",
|
||||
"sha256:35d27687f027ad25a8d0ef45dd5208ef044c588003cdcedf05afb00dbc5c2deb",
|
||||
"sha256:35d409030bf3bd05fa66fb5fdedc39c521b397f61ad04309c90444e893d05f7d",
|
||||
"sha256:4326ea1e2722f3dc00ed77c36d3b5354b8fb7399fb59230249ea6d59cbed90da",
|
||||
"sha256:4abc247b31a98f29e5224f2d31ef15f86a71f79c7f4d2ac345a5d551d6393073",
|
||||
"sha256:4d89a2e9219a526401015153c0e9dd48319ea6ab9fe3b066a20aa9aee23d9fd3",
|
||||
"sha256:4e59e99fd680e2b8b11bbd463f3c9450ab799305d5f2bafb74fefba6ac058616",
|
||||
"sha256:548794f99ff52a73a156771a0402f5e1c35285bd981046a502d7e4793e8facaa",
|
||||
"sha256:56fd98c8294f57636084f4b076b75f86c57b2a63a8410c0cd172bc93695ee979",
|
||||
"sha256:59697568a0455764a094585b2551fd76bfd6b959c9f92d4bdec9d0e14616303a",
|
||||
"sha256:6bff50ba9891be0a004ef48828e012babaaf7da204d81ab9be37480b9020a82b",
|
||||
"sha256:6cb3dd7f23b044b0737317f892d399f9e2f0b3a02b22b2c692851fb8120d82c6",
|
||||
"sha256:7dbfbc0020aa1d9bc1b0b8bcf255a7d73f4ad0336f8fd2533fcc54a4ccfb9441",
|
||||
"sha256:838eb85de6d9307c19c655c726f8d13b8b646f144ca6b3771fa62b711ebf7624",
|
||||
"sha256:8b68f565a4175e12e68ca900af8910e8fe48aaa48fd3ca853494f384e11c8bcd",
|
||||
"sha256:8f284dc1695caf71a74f24993b7c7473d77bc760be45f776a2c2f4e04c170550",
|
||||
"sha256:963ebdc5365d748185fdb06daf2ac758116deecb2277ec5ae98139f93844bc09",
|
||||
"sha256:a048dad5ed6ad1fad338c02c609b862dfaa921fcd065d747194a6805f91f2196",
|
||||
"sha256:a1bd983c565f92779be456ece2479840ec39d386007cd4ae83382646293d681b",
|
||||
"sha256:a66566f8a22561fc1a88dc87606c69b84fa9ce724f99522cf922c801ec68f5c1",
|
||||
"sha256:bcb04ff12e79b28be6c9988f275e7ab69f01cc2ba319fb3114f87817bb7c74b6",
|
||||
"sha256:bd24054aaf21e70a51e2a2a5ed1183560d3a69e6f9594a4bfe360a46f94eba83",
|
||||
"sha256:be25cb93442c6d2f8702c599b51184bd3ccd83adebd08886b682173e09ef0c3f",
|
||||
"sha256:c691b26283c3a31594683217d746f1dad59a7ae1d4cfc24626d7a064a11197d4",
|
||||
"sha256:cc9d0dec711c914ed500f1d0d3822868760954dce98dfb0b7382a854aee55d19",
|
||||
"sha256:ce2e5e04bb86da6187f96d7bab3f93a7877830981b37f0287dd6479e27a10341",
|
||||
"sha256:ce651ca46d0202c302a535d3047c55a0131a720cf554a578fc1b8a2aff0e7d96",
|
||||
"sha256:d0c8ebbfd439c37624db98f3877d9ed12c137cadd99dde2d2eae0dab0bbfc355",
|
||||
"sha256:d675a876b295afa114ca8bf42d7f86b5fb1298e1b6bb9a24405a3f6c8338811c",
|
||||
"sha256:dde3f3ed8d00c72631bc19cbfff8ad3b6215062a5eed402381ad365f82f0c18c",
|
||||
"sha256:e5a31c07cea5edbaeb4bdba6f2b87db7d3dc0f446f379d907e51cc70ea375629",
|
||||
"sha256:f514c2717012859ccb349c97862568fdc0479aad85b0270d6b5a6509dbc142e2",
|
||||
"sha256:fc0db32f7223b094964e71729c0361f93db43664dd1ec86d3df217853cedda87",
|
||||
"sha256:fd4fd83aa912d7b89b4b4a1580d30e2a4242f3936882a3f433586e5ab97ed0d5",
|
||||
"sha256:feb5db446e96bfecfec078b943cc07744cc759893cef045aa8b8b6d6aaa8274e"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==8.3.1"
|
||||
"version": "==8.3.2"
|
||||
},
|
||||
"pycparser": {
|
||||
"hashes": [
|
||||
@@ -545,11 +631,11 @@
|
||||
},
|
||||
"typing-extensions": {
|
||||
"hashes": [
|
||||
"sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497",
|
||||
"sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342",
|
||||
"sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"
|
||||
"sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e",
|
||||
"sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7",
|
||||
"sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34"
|
||||
],
|
||||
"version": "==3.10.0.0"
|
||||
"version": "==3.10.0.2"
|
||||
},
|
||||
"typing-utils": {
|
||||
"hashes": [
|
||||
@@ -561,19 +647,19 @@
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4",
|
||||
"sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"
|
||||
"sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece",
|
||||
"sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
|
||||
"version": "==1.26.6"
|
||||
"version": "==1.26.7"
|
||||
},
|
||||
"werkzeug": {
|
||||
"hashes": [
|
||||
"sha256:1de1db30d010ff1af14a009224ec49ab2329ad2cde454c8a708130642d579c42",
|
||||
"sha256:6c1ec500dcdba0baa27600f6a22f6333d8b662d22027ff9f6202e3367413caa8"
|
||||
"sha256:63d3dc1cf60e7b7e35e97fa9861f7397283b75d765afcaefd993d6046899de8f",
|
||||
"sha256:aa2bb6fc8dee8d6c504c0ac1e7f5f7dc5810a9903e793b6f715a9f015bdadb9a"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==2.0.1"
|
||||
"version": "==2.0.2"
|
||||
}
|
||||
},
|
||||
"develop": {}
|
||||
|
||||
@@ -16,6 +16,7 @@ from werkzeug import exceptions
|
||||
|
||||
from .. import auth, log, models
|
||||
from ..data import service
|
||||
from ..documents import sanitize_document
|
||||
|
||||
bp = Blueprint("collections", __name__, url_prefix = "/collections")
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
@@ -411,7 +412,9 @@ def get_overlap_ids(collection_id: str):
|
||||
|
||||
|
||||
def _upload_documents(collection, docs):
|
||||
doc_resp = service.post("/documents", json=docs)
|
||||
for doc in docs:
|
||||
sanitize_document(doc)
|
||||
doc_resp = service.post("documents", json=docs)
|
||||
# TODO if it failed, roll back the created collection and classifier
|
||||
if not doc_resp.ok:
|
||||
abort(doc_resp.status_code, doc_resp.content)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
# (C) 2019 The Johns Hopkins University Applied Physics Laboratory LLC.
|
||||
|
||||
from .bp import get_collection_ids_for, get_user_permissions, get_user_permissions_by_id, get_user_permissions_by_ids
|
||||
from .bp import get_collection_ids_for, get_user_permissions, get_user_permissions_by_id, get_user_permissions_by_ids, sanitize_document
|
||||
@@ -6,6 +6,7 @@ import re
|
||||
import typing
|
||||
|
||||
from flask import abort, Blueprint, jsonify, request
|
||||
import lxml.html.clean
|
||||
from werkzeug import exceptions
|
||||
|
||||
from .. import auth, collections, log, models
|
||||
@@ -13,6 +14,19 @@ from ..data import service
|
||||
|
||||
bp = Blueprint("documents", __name__, url_prefix = "/documents")
|
||||
|
||||
HTML_CLEANER = lxml.html.clean.Cleaner(
|
||||
page_structure=True, # keep body only
|
||||
links=True, # remove <link> (not <a>)
|
||||
safe_attrs_only=True, # strip out non-standard attributes
|
||||
style=False, # leave <style>
|
||||
javascript=True, # no javascript!
|
||||
scripts=True, # no javascript!!
|
||||
meta=True, # strip out <meta>
|
||||
forms=True, # strip out forms
|
||||
embedded=True, # strip out embedded flash, etc.,
|
||||
kill_tags=["title"] # otherwise the title gets embedded at the top
|
||||
)
|
||||
|
||||
def _document_user_can_projection():
|
||||
return service.params({"projection": {
|
||||
"collection_id": 1
|
||||
@@ -40,6 +54,9 @@ def get_user_permissions_by_id(document_id: str) -> models.CollectionUserPermiss
|
||||
def get_user_permissions_by_ids(document_ids: typing.Iterable[str]) -> typing.List[models.CollectionUserPermissions]:
|
||||
return collections.get_user_permissions_by_ids(get_collection_ids_for(document_ids))
|
||||
|
||||
def sanitize_document(document: dict):
|
||||
if document and "metadata" in document and "html_view" in document["metadata"]:
|
||||
document["metadata"]["html_view"] = HTML_CLEANER.clean_html(document["metadata"]["html_view"])
|
||||
|
||||
@bp.route("/by_id/<doc_id>", methods = ["GET"])
|
||||
@auth.login_required
|
||||
@@ -250,6 +267,9 @@ def add_document():
|
||||
if "has_annotated" not in doc:
|
||||
doc["has_annotated"] = {user_id: False for user_id in collections_by_id[doc["collection_id"]]["annotators"]}
|
||||
|
||||
# sanitize
|
||||
sanitize_document(doc)
|
||||
|
||||
# Add document(s) to database
|
||||
doc_resp = service.post("documents", json=docs)
|
||||
if not doc_resp.ok:
|
||||
|
||||
@@ -91,6 +91,15 @@ class BaseConfig(object):
|
||||
framework="spacy",
|
||||
types=["fit", "predict", "status"]
|
||||
)
|
||||
),
|
||||
dict(
|
||||
name="service_simpletransformers",
|
||||
version="1.0",
|
||||
channel="service_simpletransformers",
|
||||
service=dict(
|
||||
framework="simpletransformers",
|
||||
types=["fit", "predict", "status"]
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
"src/assets"
|
||||
],
|
||||
"styles": [
|
||||
"src/styles.css",
|
||||
"src/styles.scss",
|
||||
"src/themes.scss"
|
||||
],
|
||||
"scripts": [
|
||||
|
||||
3
frontend/annotation/package-lock.json
generated
3
frontend/annotation/package-lock.json
generated
@@ -6936,7 +6936,8 @@
|
||||
"ini": {
|
||||
"version": "1.3.8",
|
||||
"resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
|
||||
"integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew=="
|
||||
"integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
|
||||
"dev": true
|
||||
},
|
||||
"inquirer": {
|
||||
"version": "8.1.2",
|
||||
|
||||
@@ -204,52 +204,6 @@ mat-expansion-panel-header {
|
||||
box-shadow: 2px 2px 2px grey;
|
||||
}
|
||||
|
||||
.annotation, .select {
|
||||
-moz-box-shadow: 2px 2px 2px grey;
|
||||
-webkit-box-shadow: 2px 2px 2px grey;
|
||||
box-shadow: 2px 2px 2px grey;
|
||||
|
||||
border-top: 1px solid black;
|
||||
border-bottom: 1px solid black;
|
||||
}
|
||||
|
||||
.select {
|
||||
background: white !important;
|
||||
}
|
||||
|
||||
.annotationLeft, .selectLeft {
|
||||
padding-left: 10px;
|
||||
|
||||
border-left: 1px solid black;
|
||||
|
||||
-moz-border-top-left-radius: 20px;
|
||||
border-top-left-radius: 20px;
|
||||
-moz-border-bottom-left-radius: 20px;
|
||||
border-bottom-left-radius: 20px;
|
||||
}
|
||||
|
||||
.annotationRight, .selectRight {
|
||||
padding-right: 10px;
|
||||
margin-right: 2px;
|
||||
|
||||
border-right: 1px solid black;
|
||||
|
||||
border-top-right-radius: 20px;
|
||||
border-bottom-right-radius: 20px;
|
||||
}
|
||||
|
||||
.select {
|
||||
|
||||
}
|
||||
|
||||
.selectLeft {
|
||||
|
||||
}
|
||||
|
||||
.selectRight {
|
||||
|
||||
}
|
||||
|
||||
.doc-label-list {
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
@@ -1,211 +1,217 @@
|
||||
<!-- (C) 2019 The Johns Hopkins University Applied Physics Laboratory LLC.-->
|
||||
<div fxFlexFill class="page-container" fxLayout="column">
|
||||
<mat-toolbar>
|
||||
<button class="doc-back-button" mat-icon-button matTooltip="Go back to collection details"
|
||||
(click)="backToCollectionDetails()">
|
||||
<mat-icon>keyboard_arrow_left</mat-icon>
|
||||
</button>
|
||||
<span class="page-title">Document {{doc?._id}}</span>
|
||||
<span fxFlex="8px"></span>
|
||||
<button class="title-toolbar-button" mat-stroked-button (click)="scroll('detailsFlag')">Details</button>
|
||||
<span fxFlex="8px"></span>
|
||||
<button class="title-toolbar-button" mat-stroked-button (click)="scroll('docAnnotateFlag')">Labeling</button>
|
||||
<span fxFlex="8px"></span>
|
||||
<button class="title-toolbar-button" mat-stroked-button (click)="scroll('imageFlag')">Image</button>
|
||||
<span fxFlex="8px"></span>
|
||||
<button class="title-toolbar-button" mat-stroked-button (click)="scroll('documentFlag')">Document</button>
|
||||
<span fxFlex></span>
|
||||
<button class="annotate-button" mat-raised-button (click)="save(false)">
|
||||
<span class="material-icons">save</span>Save
|
||||
</button>
|
||||
<span fxFlex="10px"></span>
|
||||
<button mat-raised-button (click)="save(true)">
|
||||
<span class="material-icons">skip_next</span>
|
||||
Save and Go to Next Document
|
||||
</button>
|
||||
</mat-toolbar>
|
||||
<mat-toolbar>
|
||||
<button class="doc-back-button" mat-icon-button matTooltip="Go back to collection details"
|
||||
(click)="backToCollectionDetails()">
|
||||
<mat-icon>keyboard_arrow_left</mat-icon>
|
||||
</button>
|
||||
<span class="page-title">Document {{doc?._id}}</span>
|
||||
<span fxFlex="8px"></span>
|
||||
<button class="title-toolbar-button" mat-stroked-button (click)="scroll('detailsFlag')">Details</button>
|
||||
<span fxFlex="8px"></span>
|
||||
<button class="title-toolbar-button" mat-stroked-button (click)="scroll('docAnnotateFlag')">Labeling</button>
|
||||
<span fxFlex="8px"></span>
|
||||
<button class="title-toolbar-button" mat-stroked-button (click)="scroll('imageFlag')">Image</button>
|
||||
<span fxFlex="8px"></span>
|
||||
<button class="title-toolbar-button" mat-stroked-button (click)="scroll('documentFlag')">Document</button>
|
||||
<span fxFlex></span>
|
||||
<button class="annotate-button" mat-raised-button (click)="save(false)">
|
||||
<span class="material-icons">save</span>Save
|
||||
</button>
|
||||
<span fxFlex="10px"></span>
|
||||
<button mat-raised-button (click)="save(true)">
|
||||
<span class="material-icons">skip_next</span>
|
||||
Save and Go to Next Document
|
||||
</button>
|
||||
</mat-toolbar>
|
||||
|
||||
<div class="page-content" id="page-content" #pageContent>
|
||||
<div class="page-content" id="page-content" #pageContent>
|
||||
|
||||
<app-loading></app-loading>
|
||||
<app-loading></app-loading>
|
||||
|
||||
<mat-accordion *ngIf="!loading.loading && !loading.error" [multi]="true" displayMode="flat">
|
||||
<mat-expansion-panel class="mat-elevation-z0" id="detailsFlag" [expanded]="panelExpanded.detailsFlag"
|
||||
(closed)="panelIsOpen('detailsFlag', false)" (opened)="panelIsOpen('detailsFlag', true)"
|
||||
(afterExpand)="onAfterExpand('detailsFlag')">
|
||||
<mat-expansion-panel-header>
|
||||
<mat-panel-title>Document Details</mat-panel-title>
|
||||
</mat-expansion-panel-header>
|
||||
<mat-accordion *ngIf="!loading.loading && !loading.error" [multi]="true" displayMode="flat">
|
||||
<mat-expansion-panel class="mat-elevation-z0" id="detailsFlag" [expanded]="panelExpanded.detailsFlag"
|
||||
(closed)="panelIsOpen('detailsFlag', false)" (opened)="panelIsOpen('detailsFlag', true)"
|
||||
(afterExpand)="onAfterExpand('detailsFlag')">
|
||||
<mat-expansion-panel-header>
|
||||
<mat-panel-title>Document Details</mat-panel-title>
|
||||
</mat-expansion-panel-header>
|
||||
|
||||
<app-document-details expanded="true" [document]="doc" [collection]="collection"
|
||||
(imageUrlChanged)="imageChanged($event)">
|
||||
</app-document-details>
|
||||
</mat-expansion-panel>
|
||||
<app-document-details expanded="true" [document]="doc" [collection]="collection"
|
||||
(imageUrlChanged)="imageChanged($event)">
|
||||
</app-document-details>
|
||||
</mat-expansion-panel>
|
||||
|
||||
<mat-expansion-panel class="mat-elevation-z0" id="docAnnotateFlag" [expanded]="panelExpanded.docAnnotateFlag"
|
||||
(closed)="panelIsOpen('docAnnotateFlag', false)" (opened)="panelIsOpen('docAnnotateFlag', true)"
|
||||
(afterExpand)="onAfterExpand('docAnnotateFlag')">
|
||||
<mat-expansion-panel-header>
|
||||
<mat-panel-title>Document Labeling</mat-panel-title>
|
||||
</mat-expansion-panel-header>
|
||||
<mat-expansion-panel class="mat-elevation-z0" id="docAnnotateFlag"
|
||||
[expanded]="panelExpanded.docAnnotateFlag" (closed)="panelIsOpen('docAnnotateFlag', false)"
|
||||
(opened)="panelIsOpen('docAnnotateFlag', true)" (afterExpand)="onAfterExpand('docAnnotateFlag')">
|
||||
<mat-expansion-panel-header>
|
||||
<mat-panel-title>Document Labeling</mat-panel-title>
|
||||
</mat-expansion-panel-header>
|
||||
|
||||
<div class="doc-labeling-container">
|
||||
<div fxLayout="row">
|
||||
<mat-error *ngIf="!permissions.annotate" id="cantAnnotate">
|
||||
<h3>Note: you do not have authority to change or add annotations for this document.</h3>
|
||||
</mat-error>
|
||||
</div>
|
||||
<div class="doc-labeling-container">
|
||||
<div fxLayout="row">
|
||||
<mat-error *ngIf="!permissions.annotate" id="cantAnnotate">
|
||||
<h3>Note: you do not have authority to change or add annotations for this document.</h3>
|
||||
</mat-error>
|
||||
</div>
|
||||
|
||||
<div class="doc-label-list" fxLayout="row">
|
||||
<mat-chip-list fxFlex>
|
||||
<mat-checkbox *ngFor="let annotation of myDocAnnotations;" [(ngModel)]="annotation.checked"
|
||||
style="padding-right: 30px">
|
||||
<mat-chip [style.background-color]="annotation.label.color"
|
||||
class="shadowed cursor-pointer">
|
||||
{{annotation.label.name}}</mat-chip>
|
||||
</mat-checkbox>
|
||||
</mat-chip-list>
|
||||
</div>
|
||||
</div>
|
||||
</mat-expansion-panel>
|
||||
<div class="doc-label-list" fxLayout="row">
|
||||
<mat-chip-list fxFlex>
|
||||
<mat-checkbox *ngFor="let annotation of myDocAnnotations;" [(ngModel)]="annotation.checked"
|
||||
style="padding-right: 30px">
|
||||
<mat-chip [style.background-color]="annotation.label.color"
|
||||
class="shadowed cursor-pointer">
|
||||
{{annotation.label.name}}</mat-chip>
|
||||
</mat-checkbox>
|
||||
</mat-chip-list>
|
||||
</div>
|
||||
</div>
|
||||
</mat-expansion-panel>
|
||||
|
||||
<mat-expansion-panel class="mat-elevation-z0" id="imageFlag" class="no-padding" [expanded]="panelExpanded.imageFlag"
|
||||
(closed)="panelIsOpen('imageFlag', false)" (opened)="panelIsOpen('imageFlag', true)"
|
||||
(afterExpand)="onAfterExpand('imageFlag')">
|
||||
<mat-expansion-panel-header>
|
||||
<mat-panel-title>Image</mat-panel-title>
|
||||
</mat-expansion-panel-header>
|
||||
<mat-expansion-panel class="mat-elevation-z0" id="imageFlag" class="no-padding"
|
||||
[expanded]="panelExpanded.imageFlag" (closed)="panelIsOpen('imageFlag', false)"
|
||||
(opened)="panelIsOpen('imageFlag', true)" (afterExpand)="onAfterExpand('imageFlag')">
|
||||
<mat-expansion-panel-header>
|
||||
<mat-panel-title>Image</mat-panel-title>
|
||||
</mat-expansion-panel-header>
|
||||
|
||||
<div *ngIf="doc.metadata && doc.metadata['imageUrl']" id="myDocImage" class="image-container"
|
||||
[ngStyle]="{'height': (pageHeight - 48 - 127) + 'px'}" #imageContainer>
|
||||
<div
|
||||
style="position: absolute; top: 0px; bottom: 20px; left: 20px; right: 20px; background-color: lightgray">
|
||||
<button class="full-screen-btn" mat-raised-button (click)="toggleImageFullscreen()">{{
|
||||
isImageFullscreen() ? 'Close' : 'Open' }} Full
|
||||
Screen</button>
|
||||
<app-image-explorer [imageUrl]="doc.metadata['imageUrl']" [documentId]="doc._id"
|
||||
[collectionId]="collection._id"></app-image-explorer>
|
||||
</div>
|
||||
</div>
|
||||
</mat-expansion-panel>
|
||||
<div *ngIf="doc.metadata && doc.metadata['imageUrl']" id="myDocImage" class="image-container"
|
||||
[ngStyle]="{'height': (pageHeight - 48 - 127) + 'px'}" #imageContainer>
|
||||
<div
|
||||
style="position: absolute; top: 0px; bottom: 20px; left: 20px; right: 20px; background-color: lightgray">
|
||||
<button class="full-screen-btn" mat-raised-button (click)="toggleImageFullscreen()">{{
|
||||
isImageFullscreen() ? 'Close' : 'Open' }} Full
|
||||
Screen</button>
|
||||
<app-image-explorer [imageUrl]="doc.metadata['imageUrl']" [documentId]="doc._id"
|
||||
[collectionId]="collection._id"></app-image-explorer>
|
||||
</div>
|
||||
</div>
|
||||
</mat-expansion-panel>
|
||||
|
||||
<mat-expansion-panel class="mat-elevation-z0" id="documentFlag" class="no-padding" [expanded]="panelExpanded.documentFlag"
|
||||
(closed)="panelIsOpen('documentFlag', false)" (opened)="panelIsOpen('documentFlag', true)"
|
||||
(afterExpand)="onAfterExpand('documentFlag')">
|
||||
<mat-expansion-panel-header>
|
||||
<mat-panel-title>Document</mat-panel-title>
|
||||
</mat-expansion-panel-header>
|
||||
<mat-expansion-panel class="mat-elevation-z0" id="documentFlag" class="no-padding"
|
||||
[expanded]="panelExpanded.documentFlag" (closed)="panelIsOpen('documentFlag', false)"
|
||||
(opened)="panelIsOpen('documentFlag', true)" (afterExpand)="onAfterExpand('documentFlag')">
|
||||
<mat-expansion-panel-header>
|
||||
<mat-panel-title>Document</mat-panel-title>
|
||||
</mat-expansion-panel-header>
|
||||
|
||||
<div class="doc-content-container" [ngStyle]="{'height': (pageHeight - 48 - 127) + 'px'}">
|
||||
<div class="filter-bar">
|
||||
<button mat-icon-button (click)="showList = !showList">
|
||||
<mat-icon>list</mat-icon>
|
||||
</button>
|
||||
<div class="doc-content-container" [ngStyle]="{'height': (pageHeight - 48 - 127) + 'px'}">
|
||||
<div class="filter-bar">
|
||||
<button mat-icon-button (click)="showList = !showList">
|
||||
<mat-icon>list</mat-icon>
|
||||
</button>
|
||||
|
||||
<span fxFlex="22px"></span>
|
||||
<span fxFlex="22px"></span>
|
||||
|
||||
<span *ngIf="others.length === 0">No annotations from other users.</span>
|
||||
<div *ngIf="others.length > 0" id="others">
|
||||
<mat-form-field fxFlex="180px" floatLabel="never">
|
||||
<mat-label>Show Annotations:</mat-label>
|
||||
<mat-select id="othersAnnotations" value="" #othersSelect>
|
||||
<mat-option value="" (click)="showAnnotationsOf(othersSelect, null)">
|
||||
Mine
|
||||
</mat-option>
|
||||
<mat-option *ngFor="let other of others" [value]="other"
|
||||
(click)="showAnnotationsOf(othersSelect, other)">
|
||||
{{ auth.getUserDisplayName(other) }}</mat-option>
|
||||
</mat-select>
|
||||
</mat-form-field>
|
||||
<span fxFlex="10px"></span>
|
||||
<mat-chip-list
|
||||
*ngIf="othersSelect.value && othersDocAnnotations.hasOwnProperty(othersSelect.value) && othersDocAnnotations[othersSelect.value].length > 0">
|
||||
<mat-chip *ngFor="let label of othersDocAnnotations[othersSelect.value]"
|
||||
[style.background-color]="getColorFor(label)">
|
||||
{{label}}
|
||||
</mat-chip>
|
||||
</mat-chip-list>
|
||||
<span
|
||||
*ngIf="othersSelect.value && (!othersDocAnnotations.hasOwnProperty(othersSelect.value) || othersDocAnnotations[othersSelect.value].length === 0)">No
|
||||
labels for this document.</span>
|
||||
</div>
|
||||
<span fxFlex></span>
|
||||
<div>
|
||||
<span>
|
||||
<b>
|
||||
Document Overall Agreement:
|
||||
</b>
|
||||
<span *ngIf="ann_agreement != null && ann_agreement != 'null'">{{ann_agreement |
|
||||
percent:'1.2-2'}}</span>
|
||||
<span *ngIf="ann_agreement == null || ann_agreement == 'null'">N/A</span>
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
<span *ngIf="others.length === 0">No annotations from other users.</span>
|
||||
<div *ngIf="others.length > 0" id="others">
|
||||
<mat-form-field fxFlex="180px" floatLabel="never">
|
||||
<mat-label>Show Annotations:</mat-label>
|
||||
<mat-select id="othersAnnotations" value="" #othersSelect>
|
||||
<mat-option value="" (click)="showAnnotationsOf(othersSelect, null)">
|
||||
Mine
|
||||
</mat-option>
|
||||
<mat-option *ngFor="let other of others" [value]="other"
|
||||
(click)="showAnnotationsOf(othersSelect, other)">
|
||||
{{ auth.getUserDisplayName(other) }}</mat-option>
|
||||
</mat-select>
|
||||
</mat-form-field>
|
||||
<span fxFlex="10px"></span>
|
||||
<mat-chip-list
|
||||
*ngIf="othersSelect.value && othersDocAnnotations.hasOwnProperty(othersSelect.value) && othersDocAnnotations[othersSelect.value].length > 0">
|
||||
<mat-chip *ngFor="let label of othersDocAnnotations[othersSelect.value]"
|
||||
[style.background-color]="getColorFor(label)">
|
||||
{{label}}
|
||||
</mat-chip>
|
||||
</mat-chip-list>
|
||||
<span
|
||||
*ngIf="othersSelect.value && (!othersDocAnnotations.hasOwnProperty(othersSelect.value) || othersDocAnnotations[othersSelect.value].length === 0)">No
|
||||
labels for this document.</span>
|
||||
</div>
|
||||
<span fxFlex></span>
|
||||
<div>
|
||||
<span>
|
||||
<b>
|
||||
Document Overall Agreement:
|
||||
</b>
|
||||
<span *ngIf="ann_agreement != null && ann_agreement != 'null'">{{ann_agreement |
|
||||
percent:'1.2-2'}}</span>
|
||||
<span *ngIf="ann_agreement == null || ann_agreement == 'null'">N/A</span>
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="annotate-area">
|
||||
<div *ngIf="showList" class="annotate-table-container" fxFlex="30%">
|
||||
<app-ner-annotation-table [labels]="availableLabels" [data]="nerData"
|
||||
(remove)="removeAnnotation($event)" [readOnly]="showingAnnotationsFor !== null">
|
||||
</app-ner-annotation-table>
|
||||
</div>
|
||||
<div class="annotate-doc-container" fxFlex>
|
||||
<div class="annotate-doc-toolbar" fxLayout="row">
|
||||
<span class="mat-title">NER Annotations</span>
|
||||
<span fxFlex></span>
|
||||
<span *ngIf="showingAnnotationsFor === null">Click to select text; right-click to
|
||||
annotate
|
||||
selection</span>
|
||||
<span *ngIf="showingAnnotationsFor !== null">Showing
|
||||
{{ auth.getUserDisplayName(showingAnnotationsFor) }}'s
|
||||
annotations in read-only mode</span>
|
||||
<span fxFlex="10px"></span>
|
||||
<mat-menu #settingsMenu="matMenu" id="settings">
|
||||
<button>
|
||||
<mat-checkbox matMenuItem [(ngModel)]="settingMonospace"
|
||||
(click)="$event.stopPropagation()" class="mat-menu-item">
|
||||
Monospace font
|
||||
</mat-checkbox>
|
||||
</button>
|
||||
</mat-menu>
|
||||
<button mat-icon-button [matMenuTriggerFor]="settingsMenu" id="settingsButton"
|
||||
matTooltip="Document/annotation settings">
|
||||
<mat-icon>settings</mat-icon>
|
||||
</button>
|
||||
</div>
|
||||
<div class="annotate-area">
|
||||
<div *ngIf="showList" class="annotate-table-container" fxFlex="30%">
|
||||
<app-ner-annotation-table [labels]="availableLabels" [data]="nerData"
|
||||
(remove)="removeAnnotation($event)" [readOnly]="showingAnnotationsFor !== null">
|
||||
</app-ner-annotation-table>
|
||||
</div>
|
||||
<div class="annotate-doc-container" fxFlex>
|
||||
<div class="annotate-doc-toolbar" fxLayout="row">
|
||||
<span class="mat-title">NER Annotations</span>
|
||||
<span fxFlex></span>
|
||||
<span *ngIf="showingAnnotationsFor === null">Click to select text; right-click to
|
||||
annotate
|
||||
selection</span>
|
||||
<span *ngIf="showingAnnotationsFor !== null">Showing
|
||||
{{ auth.getUserDisplayName(showingAnnotationsFor) }}'s
|
||||
annotations in read-only mode</span>
|
||||
<span fxFlex="10px"></span>
|
||||
<mat-menu #settingsMenu="matMenu" id="settings">
|
||||
<button>
|
||||
<mat-checkbox matMenuItem [(ngModel)]="settingMonospace"
|
||||
(click)="$event.stopPropagation()" class="mat-menu-item">
|
||||
Monospace font
|
||||
</mat-checkbox>
|
||||
</button>
|
||||
</mat-menu>
|
||||
<button mat-icon-button [matMenuTriggerFor]="settingsMenu" id="settingsButton"
|
||||
matTooltip="Document/annotation settings">
|
||||
<mat-icon>settings</mat-icon>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div #docElem id="doc" class="cursor-pointer">
|
||||
<!-- set word-start and word-end to help with testing -->
|
||||
<span #wordsList class="word" *ngFor="let word of nerData.words" [id]="word.id"
|
||||
[attr.word-start]="word.start" [attr.word-end]="word.end"
|
||||
[matTooltip]="getWordTooltip(word)" (mousedown)="mousedown($event, word)"
|
||||
(mouseover)="mouseover($event, word)" (mouseout)="mouseout($event, word)"
|
||||
(mouseup)="mouseup($event, word)" (click)="click($event, word)"
|
||||
(contextmenu)="contextMenu($event, word)">{{ word.text }}</span>
|
||||
</div>
|
||||
<div #docElem id="doc" class="cursor-pointer">
|
||||
<span id="words-html">
|
||||
<div *ngIf="styleHtml" [innerHtml]="styleHtml"></div>
|
||||
<div *ngIf="contentHtml" [innerHtml]="contentHtml"></div>
|
||||
</span>
|
||||
<!-- set word-start and word-end to help with testing -->
|
||||
<ng-container *ngIf="!contentHtml">
|
||||
<span #wordsList class="word" *ngFor="let word of nerData.words" [id]="word.id"
|
||||
[attr.word-start]="word.start" [attr.word-end]="word.end"
|
||||
[matTooltip]="getWordTooltip(word)" (mousedown)="mousedown($event, word)"
|
||||
(mouseover)="mouseover($event, word)" (mouseout)="mouseout($event, word)"
|
||||
(mouseup)="mouseup($event, word)" (click)="click($event, word)"
|
||||
(contextmenu)="contextMenu($event, word)">{{ word.text }}</span>
|
||||
</ng-container>
|
||||
</div>
|
||||
|
||||
<div *ngIf="!allowOverlappingNerAnnotations"> (Note: overlapping annotations are not allowed
|
||||
for
|
||||
this
|
||||
collection.)
|
||||
</div>
|
||||
<div *ngIf="!allowOverlappingNerAnnotations"> (Note: overlapping annotations are not allowed
|
||||
for
|
||||
this
|
||||
collection.)
|
||||
</div>
|
||||
|
||||
<div #popoverTemplate id="popoverTemplate" class="popover" hidden>
|
||||
<mat-chip-list>
|
||||
<mat-chip *ngFor="let label of availableLabels"
|
||||
[style.background-color]="label.color"
|
||||
class="shadowed cursor-pointer doc-label-chip">{{label.name}}</mat-chip>
|
||||
</mat-chip-list>
|
||||
<div style="padding: 2px">
|
||||
<button mat-raised-button color="warn">
|
||||
Remove / Reset
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div #popoverTemplate id="popoverTemplate" class="popover" hidden>
|
||||
<mat-chip-list>
|
||||
<mat-chip *ngFor="let label of availableLabels"
|
||||
[style.background-color]="label.color"
|
||||
class="shadowed cursor-pointer doc-label-chip">{{label.name}}</mat-chip>
|
||||
</mat-chip-list>
|
||||
<div style="padding: 2px">
|
||||
<button mat-raised-button color="warn">
|
||||
Remove / Reset
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</mat-expansion-panel>
|
||||
</mat-accordion>
|
||||
</div>
|
||||
</mat-expansion-panel>
|
||||
</mat-accordion>
|
||||
</div>
|
||||
</div>
|
||||
File diff suppressed because it is too large
Load Diff
@@ -11,6 +11,7 @@ export class NerData {
|
||||
public words: Word[];
|
||||
public annotations: NerAnnotation[];
|
||||
private wordIndices: object;
|
||||
private wordMap: { [id: string]: Word } = {};
|
||||
|
||||
constructor() {
|
||||
this.changed = new EventEmitter<NerAnnotation[]>();
|
||||
@@ -22,7 +23,18 @@ export class NerData {
|
||||
public setWordsAndAnnotations(words: Word[], annotations: NerAnnotation[]) {
|
||||
this.words = words;
|
||||
this.setAnnotations(annotations);
|
||||
|
||||
console.log(this.words);
|
||||
|
||||
this.wordMap = {};
|
||||
for(const word of words) {
|
||||
this.wordMap[word.id] = word;
|
||||
}
|
||||
}
|
||||
|
||||
public getWordById(id: string) {
|
||||
return this.wordMap[id];
|
||||
}
|
||||
|
||||
public setAnnotations(annotations: NerAnnotation[]) {
|
||||
this.annotations = annotations.slice();
|
||||
|
||||
@@ -89,6 +89,9 @@ export class NerSelection {
|
||||
this.words[0].elem.classList.remove("selectLeft");
|
||||
for(let i = this.words[0].index - 1; i >= word.index; i--) {
|
||||
const docWord = nerData.words[i];
|
||||
if(!docWord?.elem) {
|
||||
continue;
|
||||
}
|
||||
docWord.elem.classList.add("select");
|
||||
if(i === word.index) {
|
||||
docWord.elem.classList.add("selectLeft");
|
||||
@@ -101,6 +104,9 @@ export class NerSelection {
|
||||
this.words[this.words.length - 1].elem.classList.remove("selectRight");
|
||||
for(let i = this.words[this.words.length - 1].index + 1; i <= word.index; i++) {
|
||||
const docWord = nerData.words[i];
|
||||
if(!docWord?.elem) {
|
||||
continue;
|
||||
}
|
||||
docWord.elem.classList.add("select");
|
||||
if(i === word.index) {
|
||||
docWord.elem.classList.add("selectRight");
|
||||
|
||||
@@ -1,56 +1,62 @@
|
||||
<!-- (C) 2019 The Johns Hopkins University Applied Physics Laboratory LLC.-->
|
||||
<div class="detail-container">
|
||||
<table class="metadata-table">
|
||||
<tr class="space-under">
|
||||
<td><b>Document ID:</b></td>
|
||||
<td>{{document?._id}}</td>
|
||||
</tr>
|
||||
<tr class="space-under">
|
||||
<td><b>Creation Date:</b></td>
|
||||
<td>{{document?._created}}</td>
|
||||
</tr>
|
||||
<tr class="space-under">
|
||||
<td><b>Last Updated:</b></td>
|
||||
<td>{{document?._updated}}</td>
|
||||
</tr>
|
||||
<tr class="space-under">
|
||||
<td><b>Creator:</b></td>
|
||||
<td>{{auth.getUserDisplayName(document?.creator_id)}}</td>
|
||||
</tr>
|
||||
<tr class="space-under">
|
||||
<td><b>Metadata:</b></td>
|
||||
<td>
|
||||
<table>
|
||||
<ng-container *ngIf="document && document?.metadata">
|
||||
<tr *ngFor="let item of document?.metadata | keyvalue">
|
||||
<td><b>{{item.key}}</b></td>
|
||||
<td *ngIf="item.key !== 'imageUrl'">{{item.value}}</td>
|
||||
<td *ngIf="item.key === 'imageUrl'">
|
||||
<a [href]="collections.collectionImageUrl(collection._id, item.value)" target="_blank">
|
||||
{{item.value}}
|
||||
<span
|
||||
*ngIf="item.value !== collections.collectionImageUrl(collection._id, item.value)">({{collections.collectionImageUrl(collection._id, item.value)}})</span>
|
||||
</a>
|
||||
<div><button *ngIf="permissions.modify_document_metadata" mat-button mat-raised-button
|
||||
(click)="updateImage()">Update document image</button></div>
|
||||
</td>
|
||||
</tr>
|
||||
</ng-container>
|
||||
<tr
|
||||
*ngIf="permissions.modify_document_metadata && (!document || !document.metadata || !document.metadata.hasOwnProperty('imageUrl'))">
|
||||
<td><b>imageUrl</b></td>
|
||||
<td><button mat-button mat-raised-button (click)="updateImage()">Update document
|
||||
image</button></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>Collection:</b></td>
|
||||
<td *ngIf="!collection">Loading...</td>
|
||||
<td *ngIf="collection">{{ collection.hasTitle() ? collection.getTitle() + " (" : "" }}<a href="#"
|
||||
[routerLink]="['/' + PATHS.collection.details, document?.collection_id]">{{document?.collection_id}}</a>{{ collection.hasTitle() ? ")" : "" }}
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table class="metadata-table">
|
||||
<tr class="space-under">
|
||||
<td><b>Document ID:</b></td>
|
||||
<td>{{document?._id}}</td>
|
||||
</tr>
|
||||
<tr class="space-under">
|
||||
<td><b>Creation Date:</b></td>
|
||||
<td>{{document?._created}}</td>
|
||||
</tr>
|
||||
<tr class="space-under">
|
||||
<td><b>Last Updated:</b></td>
|
||||
<td>{{document?._updated}}</td>
|
||||
</tr>
|
||||
<tr class="space-under">
|
||||
<td><b>Creator:</b></td>
|
||||
<td>{{auth.getUserDisplayName(document?.creator_id)}}</td>
|
||||
</tr>
|
||||
<tr class="space-under">
|
||||
<td><b>Metadata:</b></td>
|
||||
<td>
|
||||
<table>
|
||||
<ng-container *ngIf="document && document?.metadata">
|
||||
<tr *ngFor="let item of document?.metadata | keyvalue">
|
||||
<ng-container *ngIf="item.key != 'html_view'">
|
||||
<td><b>{{item.key}}</b></td>
|
||||
<td *ngIf="item.key !== 'imageUrl'">{{item.value}}</td>
|
||||
<td *ngIf="item.key === 'imageUrl'">
|
||||
<a [href]="collections.collectionImageUrl(collection._id, item.value)"
|
||||
target="_blank">
|
||||
{{item.value}}
|
||||
<span
|
||||
*ngIf="item.value !== collections.collectionImageUrl(collection._id, item.value)">({{collections.collectionImageUrl(collection._id,
|
||||
item.value)}})</span>
|
||||
</a>
|
||||
<div><button *ngIf="permissions.modify_document_metadata" mat-button
|
||||
mat-raised-button (click)="updateImage()">Update document image</button>
|
||||
</div>
|
||||
</td>
|
||||
</ng-container>
|
||||
</tr>
|
||||
</ng-container>
|
||||
<tr
|
||||
*ngIf="permissions.modify_document_metadata && (!document || !document.metadata || !document.metadata.hasOwnProperty('imageUrl'))">
|
||||
<td><b>imageUrl</b></td>
|
||||
<td><button mat-button mat-raised-button (click)="updateImage()">Update document
|
||||
image</button></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>Collection:</b></td>
|
||||
<td *ngIf="!collection">Loading...</td>
|
||||
<td *ngIf="collection">{{ collection.hasTitle() ? collection.getTitle() + " (" : "" }}<a href="#"
|
||||
[routerLink]="['/' + PATHS.collection.details, document?.collection_id]">{{document?.collection_id}}</a>{{
|
||||
collection.hasTitle() ? ")" : "" }}
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
@@ -1,6 +1,6 @@
|
||||
/*(C) 2019 The Johns Hopkins University Applied Physics Laboratory LLC. */
|
||||
|
||||
import { Component, OnInit, ViewChild, Input, Output, EventEmitter } from '@angular/core';
|
||||
import { Component, OnInit, ViewChild, Input, Output, EventEmitter, ChangeDetectorRef } from '@angular/core';
|
||||
import { MatPaginator } from '@angular/material/paginator';
|
||||
import { MatSort, MatSortable } from '@angular/material/sort';
|
||||
import { MatTable, MatTableDataSource } from '@angular/material/table';
|
||||
@@ -44,7 +44,9 @@ export class NERAnnotationTableComponent implements OnInit {
|
||||
|
||||
public dataSource: MatTableDataSource<NerAnnotation>;
|
||||
|
||||
constructor() {
|
||||
constructor(
|
||||
private cdr: ChangeDetectorRef
|
||||
) {
|
||||
this.dataSource = new MatTableDataSource<NerAnnotation>();
|
||||
this.dataSource.filterPredicate = (annotation, value): boolean => {
|
||||
if(annotation.label.toLowerCase().includes(value)) {
|
||||
@@ -60,6 +62,7 @@ export class NERAnnotationTableComponent implements OnInit {
|
||||
ngOnInit() {
|
||||
this.data.changed.subscribe((res: NerAnnotation[]) => {
|
||||
this.dataSource.data = res;
|
||||
this.cdr.detectChanges();
|
||||
});
|
||||
this.dataSource.sortingDataAccessor = (annotation: NerAnnotation, property: string) => {
|
||||
switch(property) {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
// (C) 2019 The Johns Hopkins University Applied Physics Laboratory LLC.
|
||||
|
||||
import * as _ from "lodash";
|
||||
import { Observable } from "rxjs";
|
||||
|
||||
export class Word {
|
||||
@@ -59,5 +60,19 @@ export class Word {
|
||||
return words;
|
||||
}
|
||||
|
||||
public static parseWordObjectsFromHtml(elems: HTMLElement[]): Word[] {
|
||||
const words = [];
|
||||
_.forEach(elems, (elem: HTMLElement) => {
|
||||
console.log(elem);
|
||||
let id = elem.getAttribute('ID');
|
||||
let parts = id.split('_');
|
||||
let start = parts[1];
|
||||
//let end = parts[2];
|
||||
let wordObj = new Word(+start, elem.innerHTML, words.length);
|
||||
words.push(wordObj);
|
||||
});
|
||||
return words;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -58,3 +58,39 @@ td.space-left {
|
||||
.spacer {
|
||||
flex: 1 1 auto;
|
||||
}
|
||||
|
||||
.annotate-area {
|
||||
.annotation, .select {
|
||||
-moz-box-shadow: 2px 2px 2px grey;
|
||||
-webkit-box-shadow: 2px 2px 2px grey;
|
||||
box-shadow: 2px 2px 2px grey;
|
||||
|
||||
border-top: 1px solid black;
|
||||
border-bottom: 1px solid black;
|
||||
}
|
||||
|
||||
.select {
|
||||
background: rgba(255,255,255,0.4) !important;
|
||||
}
|
||||
|
||||
.annotationLeft, .selectLeft {
|
||||
padding-left: 10px;
|
||||
|
||||
border-left: 1px solid black;
|
||||
|
||||
-moz-border-top-left-radius: 20px;
|
||||
border-top-left-radius: 20px;
|
||||
-moz-border-bottom-left-radius: 20px;
|
||||
border-bottom-left-radius: 20px;
|
||||
}
|
||||
|
||||
.annotationRight, .selectRight {
|
||||
padding-right: 10px;
|
||||
margin-right: 2px;
|
||||
|
||||
border-right: 1px solid black;
|
||||
|
||||
border-top-right-radius: 20px;
|
||||
border-bottom-right-radius: 20px;
|
||||
}
|
||||
}
|
||||
@@ -76,8 +76,9 @@ WORKDIR ${ROOT_DIR}
|
||||
# pipenv causing container to fail to rebuild if spacy installed previously
|
||||
#Install python requirements
|
||||
COPY Pipfile Pipfile.lock ./
|
||||
RUN REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt pipenv install --dev --system --deploy
|
||||
|
||||
RUN REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt PIPENV_INSTALL_TIMEOUT=30 \
|
||||
pipenv install --dev --system --deploy
|
||||
RUN python3 -m nltk.downloader punkt
|
||||
|
||||
#Copy contents of pipeline folder to docker
|
||||
COPY pine ./pine
|
||||
|
||||
@@ -22,6 +22,10 @@ scikit-multilearn = "~=0.2.0"
|
||||
python-json-logger = "~=2.0.2"
|
||||
overrides = "~=6.1.0"
|
||||
typing-extensions = "~=3.10.0.0"
|
||||
pandas = "~=1.3.3"
|
||||
simpletransformers = "~=0.61.13"
|
||||
torch = {file = "https://download.pytorch.org/whl/cpu/torch-1.9.0%2Bcpu-cp38-cp38-linux_x86_64.whl"}
|
||||
nltk = "~=3.6.7"
|
||||
|
||||
[requires]
|
||||
python_version = "3.8"
|
||||
|
||||
2263
pipelines/Pipfile.lock
generated
2263
pipelines/Pipfile.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -9,8 +9,10 @@ fi
|
||||
|
||||
set -x
|
||||
|
||||
pipenv run python3 -m nltk.downloader punkt
|
||||
|
||||
PIDS=""
|
||||
for SERVICE in opennlp corenlp spacy; do
|
||||
for SERVICE in simpletransformers opennlp corenlp spacy; do
|
||||
AL_PIPELINE=${SERVICE} pipenv run python3 -m pine.pipelines.run_service &
|
||||
PIDS="${PIDS} $!"
|
||||
done
|
||||
|
||||
@@ -10,6 +10,15 @@ from .shared.config import ConfigBuilder
|
||||
logger = logging.getLogger(__name__)
|
||||
config = ConfigBuilder.get_config()
|
||||
|
||||
class EveDocsAndAnnotations:
|
||||
|
||||
def __init__(self):
|
||||
self.all_labels: typing.List[str] = []
|
||||
self.documents: typing.List[str] = []
|
||||
self.annotations: typing.List = []
|
||||
self.doc_ids: typing.List[str] = []
|
||||
self.ann_ids: typing.List[str] = []
|
||||
|
||||
class EveClient(object):
|
||||
eve_headers = {'Content-Type': 'application/json'}
|
||||
|
||||
@@ -105,7 +114,7 @@ class EveClient(object):
|
||||
}
|
||||
return self._get_documents_map(params)
|
||||
|
||||
def get_docs_with_annotations(self, collection_id: str, doc_map: typing.Dict[str, str]) -> typing.Tuple[typing.List[str], typing.List[str], typing.List[str], typing.List[str]]:
|
||||
def get_docs_with_annotations(self, collection_id: str, doc_map: typing.Dict[str, str]) -> EveDocsAndAnnotations:
|
||||
"""Gets document and annotation data. Only non-overlapping documents are returned.
|
||||
|
||||
:param collection_id: str: the ID of the collection
|
||||
@@ -116,10 +125,11 @@ class EveClient(object):
|
||||
ann_ids is a list of the annotation IDs
|
||||
:rtype: tuple
|
||||
"""
|
||||
doc_ids = list()
|
||||
documents = []
|
||||
ann_ids = list()
|
||||
labels = []
|
||||
data = EveDocsAndAnnotations()
|
||||
|
||||
# get all labels from collection object
|
||||
collection = self.get_obj("collections", collection_id)
|
||||
data.all_labels = collection["labels"]
|
||||
|
||||
#get annotations and make data
|
||||
query = 'annotations?where={"collection_id":"%s"}' % (collection_id)
|
||||
@@ -132,15 +142,15 @@ class EveClient(object):
|
||||
# remove overlaps
|
||||
if docid not in doc_map:
|
||||
continue
|
||||
doc_ids.append(docid)
|
||||
documents.append(doc_map[docid])
|
||||
ann_ids.append(a["_id"])
|
||||
labels.append(a["annotation"])
|
||||
data.doc_ids.append(docid)
|
||||
data.documents.append(doc_map[docid])
|
||||
data.ann_ids.append(a["_id"])
|
||||
data.annotations.append(a["annotation"])
|
||||
|
||||
if query is None:
|
||||
break
|
||||
|
||||
return documents, labels, doc_ids, ann_ids
|
||||
return data
|
||||
|
||||
def update(self, resource, id, etag, update_obj):
|
||||
headers = {'Content-Type': 'application/json', 'If-Match': etag}
|
||||
|
||||
@@ -12,14 +12,31 @@ from skmultilearn.model_selection import IterativeStratification
|
||||
from sklearn.preprocessing import MultiLabelBinarizer
|
||||
from itertools import chain
|
||||
|
||||
from .EveClient import EveClient
|
||||
from .EveClient import EveClient, EveDocsAndAnnotations
|
||||
from . import RankingFunctions as rank
|
||||
from .pipeline import EvaluationMetrics, StatMetrics
|
||||
from .pmap_ner import NER
|
||||
from .shared.config import ConfigBuilder
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
config = ConfigBuilder.get_config()
|
||||
|
||||
class FiveFoldResult(object):
|
||||
|
||||
def __init__(self):
|
||||
self.metrics: typing.List[EvaluationMetrics] = []
|
||||
# store list of documents ids per fold
|
||||
self.folds: typing.List[typing.List] = []
|
||||
self.average_metrics: typing[dict, StatMetrics] = {}
|
||||
|
||||
def serialize_metrics(self):
|
||||
return [x.serialize() for x in self.metrics]
|
||||
|
||||
def serialize_folds(self):
|
||||
return list(self.folds) # make a copy
|
||||
|
||||
def serialize_average_metrics(self):
|
||||
return {label: self.average_metrics[label].serialize() for label in self.average_metrics.keys()}
|
||||
|
||||
class ner_api(object):
|
||||
|
||||
@@ -43,16 +60,14 @@ class ner_api(object):
|
||||
status["has_trained"] = "filename" in classifier_obj
|
||||
return status
|
||||
|
||||
def perform_fold(self, model: NER, train_data, test_data, **pipeline_parameters):
|
||||
model.fit(train_data[0], train_data[1], **pipeline_parameters)
|
||||
results = model.evaluate(test_data[0], test_data[1], range(0, len(test_data[0])))
|
||||
def perform_fold(self, model: NER, all_labels: typing.List[str], train_data, test_data, **pipeline_parameters) -> EvaluationMetrics:
|
||||
model.fit(train_data[0], train_data[1], all_labels, **pipeline_parameters)
|
||||
results = model.evaluate(test_data[0], test_data[1], all_labels)
|
||||
|
||||
return results
|
||||
|
||||
def perform_five_fold(self, model: NER, documents, annotations, doc_ids, **pipeline_parameters):
|
||||
metrics = list()
|
||||
# store list of documents ids per fold
|
||||
folds = list()
|
||||
def perform_five_fold(self, model: NER, all_labels: typing.List[str], documents, annotations, doc_ids: typing.List[str], **pipeline_parameters) -> FiveFoldResult:
|
||||
results = FiveFoldResult()
|
||||
# turning into numpy arrays to be able to access values with index array
|
||||
documents_np_array = np.array(documents)
|
||||
annotations_np_array = np.array(annotations, dtype=object)
|
||||
@@ -84,51 +99,39 @@ class ner_api(object):
|
||||
train_documents = documents_np_array[train_index]
|
||||
test_documents = documents_np_array[test_index]
|
||||
|
||||
fold_metrics = self.perform_fold(model, [train_documents.tolist(), train_annotations.tolist()],
|
||||
[test_documents.tolist(), test_annotations.tolist()], **pipeline_parameters)
|
||||
fold_metrics = self.perform_fold(model, all_labels,
|
||||
[train_documents.tolist(), train_annotations.tolist()],
|
||||
[test_documents.tolist(), test_annotations.tolist()],
|
||||
**pipeline_parameters)
|
||||
|
||||
# saving docs used to train fold
|
||||
fold_doc_ids = doc_ids_np_array[train_index]
|
||||
folds.append(fold_doc_ids.tolist())
|
||||
results.folds.append(fold_doc_ids.tolist())
|
||||
|
||||
# saving fold metrics
|
||||
metrics.append(fold_metrics)
|
||||
results.metrics.append(fold_metrics)
|
||||
|
||||
|
||||
for key in fold_metrics.keys():
|
||||
for key in fold_metrics.labels.keys():
|
||||
if key not in total_metrics:
|
||||
total_metrics[key] = {"FN": 0, "FP": 0, "TP": 0, "TN": 0, "f1": 0, "precision": 0, "recall": 0, "acc": 0}
|
||||
total_metrics[key]["FN"] = total_metrics[key]["FN"] + fold_metrics[key]["FN"]
|
||||
total_metrics[key]["FP"] = total_metrics[key]["FP"] + fold_metrics[key]["FP"]
|
||||
total_metrics[key]["TP"] = total_metrics[key]["TP"] + fold_metrics[key]["TP"]
|
||||
total_metrics[key]["TN"] = total_metrics[key]["TN"] + fold_metrics[key]["TN"]
|
||||
total_metrics[key] = StatMetrics()
|
||||
total_metrics[key].fn += fold_metrics.labels[key].fn
|
||||
total_metrics[key].fp += fold_metrics.labels[key].fp
|
||||
total_metrics[key].tp += fold_metrics.labels[key].tp
|
||||
total_metrics[key].tn += fold_metrics.labels[key].tn
|
||||
|
||||
|
||||
average_metrics = {}
|
||||
for label in total_metrics.keys():
|
||||
avg_metric = {}
|
||||
avg_metric["FN"] = total_metrics[label]["FN"] / 5
|
||||
avg_metric["FP"] = total_metrics[label]["FP"] / 5
|
||||
avg_metric["TP"] = total_metrics[label]["TP"] / 5
|
||||
avg_metric["TN"] = total_metrics[label]["TN"] / 5
|
||||
if (avg_metric["TP"] + avg_metric["FN"]) != 0:
|
||||
avg_metric["recall"] = avg_metric["TP"] / (avg_metric["TP"] + avg_metric["FN"])
|
||||
else:
|
||||
avg_metric["recall"] = 1.0
|
||||
if (avg_metric["TP"] + avg_metric["FP"]) != 0:
|
||||
avg_metric["precision"] = avg_metric["TP"] / (avg_metric["TP"] + avg_metric["FP"])
|
||||
else:
|
||||
avg_metric["precision"] = 0.0
|
||||
if (avg_metric["precision"] + avg_metric["recall"]) != 0:
|
||||
avg_metric["f1"] = 2 * (avg_metric["precision"] * avg_metric["recall"]) / (avg_metric["precision"] + avg_metric["recall"])
|
||||
else:
|
||||
avg_metric["f1"] = 0
|
||||
avg_metric["acc"] = (avg_metric["TP"] + avg_metric["TN"]) / (avg_metric["TP"] + avg_metric["TN"] + avg_metric["FP"] + avg_metric["FN"])
|
||||
avg_metric = StatMetrics()
|
||||
avg_metric.fn = total_metrics[label].fn / 5
|
||||
avg_metric.fp = total_metrics[label].fp / 5
|
||||
avg_metric.tp = total_metrics[label].tp / 5
|
||||
avg_metric.tn = total_metrics[label].tn / 5
|
||||
avg_metric.calc_precision_recall_f1_acc()
|
||||
|
||||
average_metrics[label] = avg_metric
|
||||
results.average_metrics[label] = avg_metric
|
||||
|
||||
|
||||
return metrics, folds, average_metrics
|
||||
return results
|
||||
|
||||
def get_document_ranking(self, model: NER, doc_map: typing.Dict[str, str], doc_ids: typing.List[str]) -> typing.List[str]:
|
||||
"""Calculates document rankings and returns document IDs sorted by ranking.
|
||||
@@ -189,19 +192,21 @@ class ner_api(object):
|
||||
# get documents where overlap is 0
|
||||
doc_map = self.eve_client.get_documents(collection_id)
|
||||
# get documents with its annotations where overlap is 0
|
||||
documents, labels, doc_ids, ann_ids = self.eve_client.get_docs_with_annotations(collection_id, doc_map)
|
||||
eve_data = self.eve_client.get_docs_with_annotations(collection_id, doc_map)
|
||||
|
||||
# instantiate model
|
||||
classifier = NER(pipeline_name)
|
||||
|
||||
# get folds information
|
||||
metrics, folds, averages = self.perform_five_fold(classifier, documents, labels, doc_ids, **pipeline_parameters)
|
||||
fold_results = self.perform_five_fold(classifier, eve_data.all_labels,
|
||||
eve_data.documents, eve_data.annotations,
|
||||
eve_data.doc_ids, **pipeline_parameters)
|
||||
|
||||
logger.info("Starting to train classifier for {} pipeline".format(pipeline_name))
|
||||
fit_results = classifier.fit(documents, labels, **pipeline_parameters)
|
||||
fit_results = classifier.fit(eve_data.documents, eve_data.annotations, eve_data.all_labels, **pipeline_parameters)
|
||||
results = {
|
||||
"fit": fit_results,
|
||||
"average_metrics": averages,
|
||||
"average_metrics": fold_results.serialize_average_metrics(),
|
||||
"updated_objects": {}
|
||||
}
|
||||
|
||||
@@ -221,11 +226,11 @@ class ner_api(object):
|
||||
# update classifier metrics on eve
|
||||
metrics_updated_obj = {
|
||||
'trained_classifier_db_version': classifier_obj['_version']+1,
|
||||
'documents': list(set(chain.from_iterable(folds))),
|
||||
'annotations': list(ann_ids),
|
||||
'folds': list(folds),
|
||||
'metrics': list(metrics),
|
||||
'metric_averages': dict(averages),
|
||||
'documents': list(set(chain.from_iterable(fold_results.folds))),
|
||||
'annotations': list(eve_data.ann_ids),
|
||||
'folds': fold_results.serialize_folds(),
|
||||
'metrics': fold_results.serialize_metrics(),
|
||||
'metric_averages': fold_results.serialize_average_metrics(),
|
||||
'filename': filename
|
||||
}
|
||||
if not self.eve_client.update('metrics', metrics_obj["_id"], metrics_obj['_etag'], metrics_updated_obj):
|
||||
@@ -234,7 +239,7 @@ class ner_api(object):
|
||||
results["updated_objects"]["metrics"] = [metrics_obj["_id"]]
|
||||
|
||||
# re rank documents
|
||||
ranks = self.get_document_ranking(classifier, doc_map, doc_ids)
|
||||
ranks = self.get_document_ranking(classifier, doc_map, eve_data.doc_ids)
|
||||
logger.info("Performing document rankings")
|
||||
|
||||
# Save updates to eve
|
||||
|
||||
@@ -11,7 +11,7 @@ import uuid
|
||||
|
||||
from overrides import overrides
|
||||
|
||||
from .pipeline import Pipeline, NerPrediction, DocumentPredictions, NerPredictionProbabilities, DocumentPredictionProbabilities
|
||||
from .pipeline import Pipeline, NerPrediction, DocumentPredictions, NerPredictionProbabilities, DocumentPredictionProbabilities, EvaluationMetrics, StatMetrics
|
||||
from .shared.config import ConfigBuilder
|
||||
|
||||
config = ConfigBuilder.get_config()
|
||||
@@ -161,7 +161,7 @@ class corenlp_NER(Pipeline):
|
||||
}
|
||||
|
||||
@overrides
|
||||
def fit(self, X, y, **params) -> dict:
|
||||
def fit(self, X: typing.Iterable[str], y, all_labels: typing.Iterable[str], **params) -> dict:
|
||||
default_params = self.__default_fit_params.copy()
|
||||
#format input data into tsv file for ner to train on
|
||||
try:
|
||||
@@ -303,7 +303,7 @@ wordShape=""" + default_params["word_shape"] + """
|
||||
|
||||
@overrides
|
||||
#TODO
|
||||
def next_example(self, X, Xid):
|
||||
def next_example(self, X: typing.Iterable[str], Xid):
|
||||
return
|
||||
|
||||
## EXTRA METHODS TO HELP WITH THE corenlp PIPELINE ##
|
||||
@@ -313,7 +313,7 @@ wordShape=""" + default_params["word_shape"] + """
|
||||
#Takes input data and formats it to be easier to use in the corenlp pipeline
|
||||
#ASSUMES DATA FOLLOWS FORMAT X = [string], y = [[(start offset, stop offset, label), ()], ... []]
|
||||
#Currently cannot assign more than one label to the same word
|
||||
def format_data(self, X, y):
|
||||
def format_data(self, X: typing.Iterable[str], y):
|
||||
out = []
|
||||
for doc,ann in zip(X,y):
|
||||
#Extract labeled entities from doc
|
||||
@@ -352,7 +352,7 @@ wordShape=""" + default_params["word_shape"] + """
|
||||
|
||||
@overrides
|
||||
#models must be saved with extension ".ser.gz"
|
||||
def save_model(self, model_name):
|
||||
def save_model(self, model_name: str):
|
||||
if not model_name.endswith(".ser.gz"):
|
||||
logger.warn('WARNING: model_name must end in .ser.gz, adding...')
|
||||
model_name = model_name + ".ser.gz"
|
||||
@@ -363,7 +363,7 @@ wordShape=""" + default_params["word_shape"] + """
|
||||
|
||||
@overrides
|
||||
#properties can be exported/imported during train
|
||||
def load_model(self, model_name):
|
||||
def load_model(self, model_name: str):
|
||||
#TODO: what to do if model doesn't exist?
|
||||
if not model_name.endswith(".ser.gz"):
|
||||
logger.warn('WARNING: model_name must end in .ser.gz, adding...')
|
||||
@@ -390,31 +390,31 @@ wordShape=""" + default_params["word_shape"] + """
|
||||
|
||||
#Calculates Precision, Recall, and F1 Score for model based on input test data
|
||||
#WARNING: currently works for BioNLP data, no guarantees with other datasets
|
||||
def evaluate(self, X, y, Xid, verbose=False):
|
||||
|
||||
known_labels = set()
|
||||
for anns in y:
|
||||
for ann in anns:
|
||||
known_labels.add(ann[2])
|
||||
|
||||
stats = {}
|
||||
|
||||
# WARNING: this is currently broken, but this whole pipeline is broken
|
||||
@overrides
|
||||
def evaluate(self, X: typing.Iterable[str], y, all_labels: typing.Iterable[str], verbose=False, **kwargs) -> EvaluationMetrics:
|
||||
try:
|
||||
train_data = self.format_data(X, y)
|
||||
if len(train_data) == 0 or train_data is None:
|
||||
raise Exception("ERROR: could not format input correctly")
|
||||
except:
|
||||
raise Exception("ERROR: could not format input correctly")
|
||||
|
||||
known_labels = set()
|
||||
for anns in y:
|
||||
for ann in anns:
|
||||
known_labels.add(ann[2])
|
||||
|
||||
metrics = EvaluationMetrics()
|
||||
test_text = ''
|
||||
|
||||
for doc in X:
|
||||
test_text = test_text + doc + '\n\n'
|
||||
|
||||
#rest of code tries to recreate calculations as this line, which can't be called more than once for some reason
|
||||
#results = self.__crf.classifyAndWriteAnswers(self.__java_String(self.__test_file), True)
|
||||
#print(test_text)
|
||||
results = self.__crf.classify(self.__java_String(test_text))
|
||||
|
||||
|
||||
#Calculate evaluation by iterating through answer key and matching tokens to classifier output
|
||||
s = 0
|
||||
w = 0
|
||||
@@ -474,7 +474,7 @@ wordShape=""" + default_params["word_shape"] + """
|
||||
#(likely the current answer token doesn't exactly match the guess token, see `` vs '')
|
||||
if i+1 < len(doc):
|
||||
next_gold = doc[i+1]
|
||||
elif i >= len(doc) and d+1 < len(test_data):
|
||||
elif i >= len(doc) and d+1 < len(test_data): # this is broken
|
||||
next_gold = test_data[d+1][0]
|
||||
else:
|
||||
next_gold = (None, None)
|
||||
@@ -488,32 +488,30 @@ wordShape=""" + default_params["word_shape"] + """
|
||||
|
||||
known_labels.add(pred)
|
||||
|
||||
# Per token metriccs
|
||||
# Per token metrics
|
||||
for label in known_labels:
|
||||
if label not in stats:
|
||||
stats[label] = [0, 0, 0, 0]
|
||||
|
||||
|
||||
if label not in metrics.labels:
|
||||
metrics.labels[label] = StatMetrics()
|
||||
|
||||
|
||||
if gold == pred and gold != 'O':
|
||||
stats[gold][0] = stats[gold][0] + 1
|
||||
metrics.labels[gold].tp += 1
|
||||
for label in known_labels:
|
||||
if label != gold:
|
||||
stats[label][3] = stats[label][3] + 1
|
||||
metrics.labels[label].tn += 1
|
||||
elif gold == 'O' and pred != 'O':
|
||||
stats[pred][1] = stats[pred][1] + 1
|
||||
metrics.labels[pred].fp += 1
|
||||
for label in known_labels:
|
||||
if label != pred:
|
||||
stats[label][3] = stats[label][3] + 1
|
||||
metrics.labels[label].tn += 1
|
||||
elif pred == 'O' and gold != 'O':
|
||||
stats[gold][2] = stats[gold][2] + 1
|
||||
metrics.labels[gold].fn += 1
|
||||
for label in known_labels:
|
||||
if label != gold:
|
||||
stats[label][3] = stats[label][3] + 1
|
||||
metrics.labels[label].tn += 1
|
||||
else:
|
||||
for label in known_labels:
|
||||
stats[label][3] = stats[label][3] + 1
|
||||
metrics.labels[label].tn += 1
|
||||
|
||||
|
||||
# Per annotation metrics
|
||||
@@ -555,54 +553,22 @@ wordShape=""" + default_params["word_shape"] + """
|
||||
#ONLY USED FOR PER ANNOTATION METRICS
|
||||
# del stats['O']
|
||||
|
||||
TP = 0
|
||||
TN = 0
|
||||
FP = 0
|
||||
FN = 0
|
||||
for key in stats:
|
||||
TP = TP + stats[key][0]
|
||||
FP = FP + stats[key][1]
|
||||
FN = FN + stats[key][2]
|
||||
TN = TN + stats[key][3]
|
||||
|
||||
stats['Totals'] = [TP, FP, FN, TN]
|
||||
for key in metrics.labels:
|
||||
metrics.totals.tp += metrics.labels[key].tp
|
||||
metrics.totals.fp += metrics.labels[key].fp
|
||||
metrics.total.fn += metrics.labels[key].fn
|
||||
metrics.total.tn += metrics.labels[key].tn
|
||||
|
||||
|
||||
|
||||
#print(test_data[-1])
|
||||
for key in stats:
|
||||
TP = stats[key][0]
|
||||
FP = stats[key][1]
|
||||
FN = stats[key][2]
|
||||
# Only generated when using per token metrics
|
||||
TN = stats[key][3]
|
||||
if (TP+FN) != 0:
|
||||
recall = TP/(TP+FN)
|
||||
else:
|
||||
recall = 1.0
|
||||
if (TP+FP) != 0:
|
||||
precision = TP/(TP+FP)
|
||||
else:
|
||||
precision = 0.0
|
||||
if (precision + recall) != 0:
|
||||
f1 = 2 * (precision * recall) / (precision + recall)
|
||||
else:
|
||||
f1 = 0
|
||||
# Acc Only works when using per token metrics which generates TN
|
||||
if (TP + FN + FP + TN) != 0:
|
||||
acc = (TP + TN) / (TP + FN + FP + TN)
|
||||
else:
|
||||
acc = 0
|
||||
#Used for annotation metrics
|
||||
# stats[key] = {'precision': precision, 'recall': recall, 'f1': f1, 'TP': TP, 'FP': FP, 'FN': FN}
|
||||
# Used for token metrics
|
||||
stats[key] = {'precision': precision, 'recall': recall, 'f1': f1, 'TP': TP, 'FP': FP, 'FN': FN, 'TN': TN, 'acc': acc}
|
||||
metrics.calc_precision_recall_f1_acc()
|
||||
|
||||
return stats
|
||||
return metrics
|
||||
|
||||
#Calculates Precision, Recall, and F1 Score for model based on input test data
|
||||
#TODO: prints a whole lot to the command line, find a way to suppress?
|
||||
def evaluate_orig(self, X, y, Xid):
|
||||
def evaluate_orig(self, X: typing.Iterable[str], y, Xid):
|
||||
try:
|
||||
test_data = self.format_data(X, y)
|
||||
if len(test_data) == 0 or test_data is None:
|
||||
|
||||
@@ -13,7 +13,7 @@ import typing
|
||||
import pydash
|
||||
from overrides import overrides
|
||||
|
||||
from .pipeline import Pipeline, NerPrediction, DocumentPredictions, NerPredictionProbabilities, DocumentPredictionProbabilities
|
||||
from .pipeline import Pipeline, NerPrediction, DocumentPredictions, NerPredictionProbabilities, DocumentPredictionProbabilities, EvaluationMetrics, StatMetrics
|
||||
from .shared.config import ConfigBuilder
|
||||
|
||||
config = ConfigBuilder.get_config()
|
||||
@@ -148,14 +148,14 @@ class opennlp_NER(Pipeline):
|
||||
}
|
||||
|
||||
@overrides
|
||||
def fit(self, X, y, **params) -> dict:
|
||||
def fit(self, X: typing.Iterable[str], y, all_labels: typing.Iterable[str], **params) -> dict:
|
||||
try:
|
||||
data = self.format_data(X, y)
|
||||
if len(data)==0 or data is None:
|
||||
raise Exception("ERROR: could not format input correctly")
|
||||
except:
|
||||
raise Exception("ERROR: could not format input correctly")
|
||||
#print(data)
|
||||
logger.debug("Formated train data: %s", data)
|
||||
with open(self.__train_file, 'w') as f:
|
||||
f.write(data)
|
||||
inputStreamFactory = self.__java_MarkableFileInputStreamFactory(self.__java_File(self.__java_String(self.__train_file)))
|
||||
@@ -249,14 +249,14 @@ class opennlp_NER(Pipeline):
|
||||
|
||||
@overrides
|
||||
# TODO
|
||||
def next_example(self, X, Xid):
|
||||
def next_example(self, X: typing.Iterable[str], Xid):
|
||||
return
|
||||
|
||||
# EXTRA METHODS TO HELP WITH THE opennlp PIPELINE ##
|
||||
|
||||
@overrides
|
||||
# models must be saved and loaded with extension ".bin"
|
||||
def save_model(self, model_name):
|
||||
def save_model(self, model_name: str):
|
||||
if not model_name.endswith(".bin"):
|
||||
logger.warning('WARNING: model_name must end with .bin, adding...')
|
||||
model_name = model_name + ".bin"
|
||||
@@ -266,7 +266,7 @@ class opennlp_NER(Pipeline):
|
||||
|
||||
|
||||
@overrides
|
||||
def load_model(self, model_name):
|
||||
def load_model(self, model_name: str):
|
||||
if not model_name.endswith(".bin"):
|
||||
logger.warning('WARNING: model_name must end with .bin, adding...')
|
||||
model_name = model_name + ".bin"
|
||||
@@ -313,7 +313,7 @@ class opennlp_NER(Pipeline):
|
||||
#Takes input data and formats it to be easier to use in the opennlp pipeline
|
||||
#ASSUMES DATA FOLLOWS FORMAT X = [string], y = [[(start offset, stop offset, label), ()], ... []]
|
||||
#Currently cannot assign more than one label to the same word
|
||||
def format_data(self, X, y):
|
||||
def format_data(self, X: typing.Iterable[str], y):
|
||||
out = ''
|
||||
try:
|
||||
for doc, ann in zip(X, y):
|
||||
@@ -373,13 +373,14 @@ class opennlp_NER(Pipeline):
|
||||
labels_per_token.append(labels)
|
||||
return labels_per_token
|
||||
|
||||
def evaluate(self, X, y, Xid):
|
||||
@overrides
|
||||
def evaluate(self, X: typing.Iterable[str], y, all_labels: typing.Iterable[str], **kwargs) -> EvaluationMetrics:
|
||||
predictions = self.predict(X)
|
||||
stats = {'Totals': [0, 0, 0, 0]}
|
||||
metrics = EvaluationMetrics()
|
||||
|
||||
for (doc_id, prediction) in zip(Xid, predictions):
|
||||
for (index, prediction) in enumerate(predictions):
|
||||
guesses: typing.List[NerPrediction] = prediction.ner
|
||||
gold = y[Xid.index(doc_id)]
|
||||
gold = y[index]
|
||||
|
||||
all_tokens = prediction.extra_data
|
||||
|
||||
@@ -414,47 +415,26 @@ class opennlp_NER(Pipeline):
|
||||
else:
|
||||
TN.append(label)
|
||||
for label in all_known_labels:
|
||||
if label not in stats:
|
||||
stats[label] = [0,0,0,0]
|
||||
if label not in metrics.labels:
|
||||
metrics.labels[label] = StatMetrics()
|
||||
for label in TP:
|
||||
stats[label][0] += 1
|
||||
stats['Totals'][0] += 1
|
||||
metrics.labels[label].tp += 1
|
||||
metrics.totals.tp += 1
|
||||
for label in FP:
|
||||
stats[label][1] += 1
|
||||
stats['Totals'][1] += 1
|
||||
metrics.labels[label].fp += 1
|
||||
metrics.totals.fp += 1
|
||||
for label in FN:
|
||||
stats[label][2] += 1
|
||||
stats['Totals'][2] += 1
|
||||
metrics.labels[label].fn += 1
|
||||
metrics.totals.fn += 1
|
||||
for label in TN:
|
||||
stats[label][3] += 1
|
||||
stats['Totals'][3] += 1
|
||||
metrics.labels[label].tn += 1
|
||||
metrics.totals.tn += 1
|
||||
|
||||
for key in stats:
|
||||
TP = stats[key][0]
|
||||
FP = stats[key][1]
|
||||
FN = stats[key][2]
|
||||
TN = stats[key][3]
|
||||
if (TP + FN) != 0:
|
||||
recall = TP / (TP + FN)
|
||||
else:
|
||||
recall = 1.0
|
||||
if (TP + FP) != 0:
|
||||
precision = TP / (TP + FP)
|
||||
else:
|
||||
precision = 0.0
|
||||
if (precision + recall) != 0:
|
||||
f1 = 2 * (precision * recall) / (precision + recall)
|
||||
else:
|
||||
f1 = 0
|
||||
if (TP + FN + FP + TN) != 0:
|
||||
acc = (TP + TN) / (TP + FN + FP + TN)
|
||||
else:
|
||||
acc = 0
|
||||
stats[key] = {'precision': precision, 'recall': recall, 'f1': f1, 'TP': TP, 'FP': FP, 'FN': FN, "TN" : TN, "acc": acc}
|
||||
metrics.calc_precision_recall_f1_acc()
|
||||
|
||||
return stats
|
||||
return metrics
|
||||
|
||||
def evaluate_orig(self, X, y, Xid):
|
||||
def evaluate_orig(self, X: typing.Iterable[str], y, Xid):
|
||||
try:
|
||||
data = self.format_data(X, y)
|
||||
if len(data) == 0 or data is None:
|
||||
|
||||
@@ -1,8 +1,66 @@
|
||||
# (C) 2019 The Johns Hopkins University Applied Physics Laboratory LLC.
|
||||
|
||||
import abc
|
||||
import logging
|
||||
import typing
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class StatMetrics(object):
|
||||
|
||||
def __init__(self, precision: float = None, recall: float = None, f1: float = None,
|
||||
tp: int = 0, fp: int = 0, fn: int = 0, tn: int = 0, acc: float = None):
|
||||
self.precision = precision
|
||||
self.recall = recall
|
||||
self.f1 = f1
|
||||
self.tp = tp
|
||||
self.fp = fp
|
||||
self.fn = fn
|
||||
self.tn = tn
|
||||
self.acc = acc
|
||||
|
||||
def calc_precision_recall_f1_acc(self):
|
||||
if (self.tp + self.fn) != 0:
|
||||
self.recall = self.tp / (self.tp + self.fn)
|
||||
else:
|
||||
self.recall = 1.0
|
||||
if (self.tp + self.fp) != 0:
|
||||
self.precision = self.tp / (self.tp + self.fp)
|
||||
else:
|
||||
self.precision = 0.0
|
||||
if (self.precision + self.recall) != 0:
|
||||
self.f1 = 2 * (self.precision * self.recall) / (self.precision + self.recall)
|
||||
else:
|
||||
self.f1 = 0.0
|
||||
if (self.tp + self.fn + self.fp + self.tn) != 0:
|
||||
self.acc = (self.tp + self.tn) / (self.tp + self.fn + self.fp + self.tn)
|
||||
else:
|
||||
self.acc = 0.0
|
||||
|
||||
def serialize(self) -> dict:
|
||||
return {"precision": self.precision, "recall": self.recall, "f1": self.f1, "TP": self.tp,
|
||||
"FP": self.fp, "FN": self.fn, "TN": self.tn, "acc": self.acc}
|
||||
|
||||
class EvaluationMetrics(object):
|
||||
|
||||
def __init__(self):
|
||||
self.labels: typing.Dict[str, StatMetrics] = {}
|
||||
self.totals = StatMetrics()
|
||||
|
||||
def calc_precision_recall_f1_acc(self):
|
||||
for label in self.labels:
|
||||
self.labels[label].calc_precision_recall_f1_acc()
|
||||
self.totals.calc_precision_recall_f1_acc()
|
||||
|
||||
def serialize(self) -> dict:
|
||||
d = {}
|
||||
for key in self.labels:
|
||||
d[key] = self.labels[key].serialize()
|
||||
if key == "Totals":
|
||||
logging.warn("There was a label called 'Totals' that is going to be overridden.")
|
||||
d["Totals"] = self.totals.serialize()
|
||||
return d
|
||||
|
||||
class NerPrediction(object):
|
||||
def __init__(self, offset_start: int, offset_end: int, label: str):
|
||||
self.offset_start: int = offset_start
|
||||
@@ -64,9 +122,15 @@ class Pipeline(object, metaclass=abc.ABCMeta):
|
||||
# fit(X, y)
|
||||
# internal state is changed
|
||||
@abc.abstractmethod
|
||||
def fit(self, X, y, **params) -> dict:
|
||||
def fit(self, X: typing.Iterable[str], y, all_labels: typing.Iterable[str], **params) -> dict:
|
||||
raise NotImplementedError('Must define fit to use Pipeline Base Class')
|
||||
|
||||
# evaluate(X, y, all_labels)
|
||||
# returns stats
|
||||
@abc.abstractmethod
|
||||
def evaluate(self, X: typing.Iterable[str], y, all_labels: typing.Iterable[str], **kwargs) -> EvaluationMetrics:
|
||||
raise NotImplementedError('Must define evaluate to use Pipeline Base Class')
|
||||
|
||||
# predict(X)
|
||||
# returns [[[offset_start, offset_end, label], ..., ...]
|
||||
@abc.abstractmethod
|
||||
@@ -84,15 +148,15 @@ class Pipeline(object, metaclass=abc.ABCMeta):
|
||||
# Given model's current state evaluate the input (id, String) pairs and return a rank ordering of lowest->highest scores for instances (will need to discuss specifics of ranking)
|
||||
# Discussing rank is now a major project - see notes
|
||||
@abc.abstractmethod
|
||||
def next_example(self, X, Xid):
|
||||
def next_example(self, X: typing.Iterable[str], Xid):
|
||||
raise NotImplementedError('Must define next_example to use Pipeline Base Class')
|
||||
|
||||
# saves model so that it can be loaded again later
|
||||
@abc.abstractmethod
|
||||
def save_model(self, model_name):
|
||||
def save_model(self, model_name: str):
|
||||
raise NotImplementedError('Must define save_model to use Pipeline Base Class')
|
||||
|
||||
# loads a previously saved model
|
||||
@abc.abstractmethod
|
||||
def load_model(self, model_name):
|
||||
def load_model(self, model_name: str):
|
||||
raise NotImplementedError('Must define load_model to use Pipeline Base Class')
|
||||
|
||||
@@ -5,7 +5,7 @@ import logging
|
||||
import os
|
||||
import typing
|
||||
|
||||
from .pipeline import Pipeline, DocumentPredictions, DocumentPredictionProbabilities
|
||||
from .pipeline import Pipeline, DocumentPredictions, DocumentPredictionProbabilities, EvaluationMetrics
|
||||
|
||||
from overrides import overrides
|
||||
|
||||
@@ -20,7 +20,7 @@ class NER(Pipeline):
|
||||
__lib = ''
|
||||
pipeline = -1
|
||||
|
||||
__SUPPORTED_PIPELINES = ['spacy', 'corenlp', 'opennlp']
|
||||
__SUPPORTED_PIPELINES = ['spacy', 'corenlp', 'opennlp', 'simpletransformers']
|
||||
|
||||
#initializes proper nlp library pipeline based on user selection
|
||||
#there are additional args to accomodate initializing different pipelines, check individual pipeline for specifics
|
||||
@@ -56,8 +56,8 @@ class NER(Pipeline):
|
||||
#internal state is changed
|
||||
#kwargs varies between pipelines, see individual pipeline for extra arguments
|
||||
@overrides
|
||||
def fit(self, X, y, **kwargs) -> dict:
|
||||
return self.pipeline.fit(X, y, **kwargs)
|
||||
def fit(self, X: typing.Iterable[str], y, all_labels: typing.Iterable[str], **params) -> dict:
|
||||
return self.pipeline.fit(X, y, all_labels, **params)
|
||||
|
||||
@overrides
|
||||
def predict(self, X: typing.Iterable[str]) -> typing.List[DocumentPredictions]:
|
||||
@@ -68,20 +68,19 @@ class NER(Pipeline):
|
||||
def predict_proba(self, X: typing.Iterable[str], **kwargs) -> typing.List[DocumentPredictionProbabilities]:
|
||||
return self.pipeline.predict_proba(X, **kwargs)
|
||||
|
||||
# evaluate(X, y, Xid)
|
||||
# returns stats
|
||||
def evaluate(self, X, y, Xid, **kwargs):
|
||||
return self.pipeline.evaluate(X, y, Xid, **kwargs)
|
||||
@overrides
|
||||
def evaluate(self, X: typing.Iterable[str], y, all_labels: typing.Iterable[str], **kwargs) -> EvaluationMetrics:
|
||||
return self.pipeline.evaluate(X, y, all_labels, **kwargs)
|
||||
|
||||
#next_example(Xid)
|
||||
#Given model's current state evaluate the input (id, String) pairs and return a rank ordering of lowest->highest scores for instances (will need to discuss specifics of ranking)
|
||||
@overrides
|
||||
def next_example(self, X, Xid):
|
||||
def next_example(self, X: typing.Iterable[str], Xid):
|
||||
#may want to program it here instead of one level down, as the ranking function might not change with the pipeline used
|
||||
return self.pipeline.next_example(X, Xid)
|
||||
|
||||
@overrides
|
||||
def save_model(self, model_name):
|
||||
def save_model(self, model_name: str):
|
||||
directory = os.path.dirname(model_name)
|
||||
# if directories in path dont exists create them
|
||||
if not os.path.exists(directory):
|
||||
@@ -90,5 +89,5 @@ class NER(Pipeline):
|
||||
return self.pipeline.save_model(model_name)
|
||||
|
||||
@overrides
|
||||
def load_model(self, model_name):
|
||||
def load_model(self, model_name: str):
|
||||
self.pipeline.load_model(model_name)
|
||||
|
||||
@@ -95,6 +95,15 @@ class BaseConfig(object):
|
||||
framework="spacy",
|
||||
types=["fit", "predict", "status"]
|
||||
)
|
||||
),
|
||||
dict(
|
||||
name="simpletransformers",
|
||||
version="1.0",
|
||||
channel="service_simpletransformers",
|
||||
service=dict(
|
||||
framework="simpletransformers",
|
||||
types=["fit", "predict", "status"]
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
305
pipelines/pine/pipelines/simpletransformers_NER_pipeline.py
Normal file
305
pipelines/pine/pipelines/simpletransformers_NER_pipeline.py
Normal file
@@ -0,0 +1,305 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding: utf8
|
||||
# (C) 2019 The Johns Hopkins University Applied Physics Laboratory LLC.
|
||||
|
||||
import logging
|
||||
import os
|
||||
import os.path
|
||||
from shutil import copyfile
|
||||
import uuid
|
||||
|
||||
import typing
|
||||
|
||||
from overrides import overrides
|
||||
|
||||
from .pipeline import Pipeline, NerPrediction, DocumentPredictions, NerPrediction, NerPredictionProbabilities, DocumentPredictionProbabilities, EvaluationMetrics, StatMetrics
|
||||
from .shared.config import ConfigBuilder
|
||||
|
||||
from nltk.tokenize import WhitespaceTokenizer
|
||||
from nltk.tokenize.punkt import PunktSentenceTokenizer
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from simpletransformers.ner import NERModel, NERArgs
|
||||
|
||||
config = ConfigBuilder.get_config()
|
||||
logger = logging.getLogger(__name__)
|
||||
transformers_logger = logging.getLogger("transformers")
|
||||
transformers_logger.setLevel(logging.WARNING)
|
||||
|
||||
# TODO: Change the collections.json file to default the collection for simple transformers with real classifiers, etc
|
||||
|
||||
class simpletransformers_NER(Pipeline):
|
||||
|
||||
def __init__(self, tmp_dir=None):
|
||||
self.__id = uuid.uuid4()
|
||||
if tmp_dir != None:
|
||||
self.__temp_dir = tmp_dir
|
||||
#can choose to dictate where the model will store files so that it doesn't overwrite any,
|
||||
#otherwise it will write to a new directory within the resources folder
|
||||
else:
|
||||
self.__temp_dir = config.ROOT_DIR + '/tmp/simpletransformers-' + str(self.__id)
|
||||
|
||||
self.__model_dir = os.path.join(self.__temp_dir, "OUTPUT_MODEL/")
|
||||
self.__default_model_args = {
|
||||
# TODO: Some of these should be args passed in with defaults, probably epoch size, the dirs, any others Brant might say
|
||||
# TODO: There is a runs/ directory that is default created in the current directory where this is ran (pipelines/),
|
||||
# there might be an option to change that, or maybe just add to gitignore?
|
||||
"output_dir": self.__model_dir,
|
||||
"cache_dir": os.path.join(self.__temp_dir, "CACHE_DIR/"),
|
||||
"tensorboard_dir": os.path.join(self.__temp_dir, "TENSORBOARD/"),
|
||||
"max_seq_length": 128,
|
||||
"train_batch_size": 16,
|
||||
"gradient_accumulation_steps": 1,
|
||||
"eval_batch_size": 8,
|
||||
"num_train_epochs": 1,
|
||||
"weight_decay": 0,
|
||||
"learning_rate": 4e-5,
|
||||
"adam_epsilon": 1e-8,
|
||||
"warmup_ratio": 0.06,
|
||||
"warmup_steps": 20,
|
||||
"max_grad_norm": 1.0,
|
||||
|
||||
"logging_steps": 50,
|
||||
"save_steps": 500,
|
||||
|
||||
"overwrite_output_dir": True,
|
||||
"reprocess_input_data": False,
|
||||
"evaluate_during_training": False,
|
||||
}
|
||||
# TODO: Switch back to bioclinical bert, and also adding this as an option to change.
|
||||
# All models we can use: https://huggingface.co/models
|
||||
# self.__model_name = "emilyalsentzer/Bio_ClinicalBERT"
|
||||
# This currently being used because it is faster.
|
||||
self.__model_type = "bert"
|
||||
self.__model_name = "google/mobilebert-uncased"
|
||||
self.__model_use_cuda = False
|
||||
self.__model = None
|
||||
self.__sentence_tokenizer = PunktSentenceTokenizer()
|
||||
self.__word_tokenizer = WhitespaceTokenizer()
|
||||
|
||||
# status()
|
||||
@overrides
|
||||
def status(self) -> dict:
|
||||
return {
|
||||
"default_model_args": self.__default_model_args
|
||||
}
|
||||
|
||||
# fit(X, y)
|
||||
# internal state is changed
|
||||
@overrides
|
||||
def fit(self, X: typing.Iterable[str], y, all_labels: typing.Iterable[str], **params) -> dict:
|
||||
# setting up params
|
||||
model_args = self.__default_model_args.copy()
|
||||
if params is not None:
|
||||
for key in model_args.keys():
|
||||
if key in params:
|
||||
model_args[key]= params[key]
|
||||
logger.info("Training with parameters: {}".format(model_args))
|
||||
|
||||
# First, need to set up the data into a pandas dataframe and format our labels
|
||||
df = self._format_data(X, y)
|
||||
labels = self._format_labels(all_labels)
|
||||
|
||||
# Create a new model, needs to be here for now since this is where we get labels
|
||||
if not self.__model:
|
||||
self.__model = NERModel(self.__model_type, self.__model_name, labels=labels,
|
||||
use_cuda=self.__model_use_cuda, args=model_args)
|
||||
|
||||
# After this, the model should be trained, and output files created
|
||||
self.__model.train_model(df, verbose=False, silent=True,
|
||||
show_running_loss=False)
|
||||
|
||||
return {}
|
||||
|
||||
@overrides
|
||||
def evaluate(self, X: typing.Iterable[str], y, all_labels: typing.Iterable[str], **kwargs) -> EvaluationMetrics:
|
||||
if not self.__model:
|
||||
raise Exception("Can't evaluate until model has been trained or loaded")
|
||||
|
||||
# First, need to set up the data into a pandas dataframe and format our labels
|
||||
df = self._format_data(X, y)
|
||||
|
||||
# No need to recreate model, as this is only run after fit().
|
||||
|
||||
# Evaluate.
|
||||
result, model_outputs, preds_list = self.__model.eval_model(
|
||||
df, verbose=False)
|
||||
# acc=sklearn.metrics.accuracy_score
|
||||
logger.info("Evaluated model, result={}".format(result))
|
||||
|
||||
metrics = EvaluationMetrics()
|
||||
metrics.totals.precision = result["precision"]
|
||||
metrics.totals.recall = result["recall"]
|
||||
metrics.totals.f1 = result["f1_score"]
|
||||
|
||||
# TODO: need acc
|
||||
# TODO: need metrics for each label
|
||||
|
||||
return metrics
|
||||
|
||||
# predict(X)
|
||||
@overrides
|
||||
def predict(self, X: typing.Iterable[str]) -> typing.List[DocumentPredictions]:
|
||||
# First, make sure this model has been trained
|
||||
if not self.__model:
|
||||
return None
|
||||
|
||||
# Make predictions with the model
|
||||
return_preds = []
|
||||
for doc in X:
|
||||
data = [s for s in self._sentencize(doc)]
|
||||
predictions, _ = self.__model.predict([sentence for (_, _, sentence) in data])
|
||||
return_preds.append(self._format_prediction(data, predictions))
|
||||
|
||||
return return_preds
|
||||
|
||||
# predict_proba(X)
|
||||
# can also return scores for all labels if get_all is True
|
||||
@overrides
|
||||
def predict_proba(self, X: typing.Iterable[str], **kwargs) -> typing.List[DocumentPredictionProbabilities]:
|
||||
# TODO: Need to implement this.
|
||||
# The "raw_outputs" (second item in tuple returned from predict) is probably useful for this.
|
||||
# Can turn predictions into probabilities for each label by running:
|
||||
# Where the array passed in refers to each word (print raw_outputs in the expanded_ner.py file to see this)
|
||||
# a = np.asarray([-0.2597193, 0.3929489, 0.42044127, 0.65579444, -0.075302914, 0.0072728638, 0.11236907, -0.035289638, -0.09346388, -0.25901815, -0.16599336, -0.06283752, -0.2664347])
|
||||
# prob = softmax(a)
|
||||
# prob is then equal to: array([0.0552652 , 0.10614558, 0.10910426, 0.13805568, 0.06645731,
|
||||
# 0.07217802, 0.0801766 , 0.0691704 , 0.06526127, 0.05530396,
|
||||
# 0.06069549, 0.06729091, 0.05489531]) which look like the probabilities of the labels (there are the same number of elements as labels)
|
||||
# It probably refers to the order of the labels given in, so if the labels arg was ['B-geo', 'I-geo'...] then
|
||||
# B-geo is probably 0.0552652 and I-geo is probably 0.10614558... etc
|
||||
return []
|
||||
|
||||
# next_example(X, Xid)
|
||||
# Given model's current state evaluate the input (id, String) pairs and return a rank ordering of lowest->highest scores for instances (will need to discuss specifics of ranking)
|
||||
# Discussing rank is now a major project - see notes
|
||||
@overrides
|
||||
def next_example(self, X: typing.Iterable[str], Xid):
|
||||
# Don't think we needed to do anything with this.
|
||||
return None
|
||||
|
||||
# saves model so that it can be loaded again later
|
||||
@overrides
|
||||
def save_model(self, model_name: str):
|
||||
# Save all files from the output dir to the desired spot in order to load
|
||||
os.mkdir(model_name)
|
||||
# Copy from the tmp directory - but not the checkpoints
|
||||
for filename in os.listdir(self.__model_dir):
|
||||
if "checkpoint" not in filename:
|
||||
copyfile(os.path.join(self.__model_dir, filename), os.path.join(model_name, filename))
|
||||
|
||||
return model_name
|
||||
|
||||
# loads a previously saved model
|
||||
@overrides
|
||||
def load_model(self, model_name: str):
|
||||
# Loading from model requires creating the model from the saved directory
|
||||
# This "model_name" is just the path, it doesn't refer to the name like before
|
||||
self.__model = NERModel(self.__model_type, model_name,
|
||||
use_cuda=self.__model_use_cuda, args=self.__default_model_args)
|
||||
|
||||
###############################
|
||||
# Helper Methods
|
||||
###############################
|
||||
|
||||
def _get_word_label(self, start_index, end_index, label_list):
|
||||
# Takes in the indices of a word and label list to return a related tag (if possible)
|
||||
# This will account for the I-<label> or B-<label> that simpletransformers expects
|
||||
for label_group in label_list:
|
||||
# This works because the word either begins a multi-word label or the label only covers a single word
|
||||
if label_group[0] == start_index:
|
||||
return "B-" + label_group[2]
|
||||
# This is at least the second word in a multi-word label
|
||||
# <= because == works on the last word, > is for any word that appears BETWEEN the first and last words
|
||||
elif label_group[0] < start_index and label_group[1] >= end_index:
|
||||
return "I-" + label_group[2]
|
||||
# Assuming y is always sorted, this ends the loop if there is no label at this index early to save time
|
||||
elif end_index < label_group[0]:
|
||||
break
|
||||
|
||||
# If it got here, the label was not found
|
||||
return "O"
|
||||
|
||||
def _sentencize(self, text: str) -> typing.Generator[int, int, str]:
|
||||
for (sentence_start, sentence_end) in self.__sentence_tokenizer.span_tokenize(text):
|
||||
yield (sentence_start, sentence_end, text[sentence_start:sentence_end])
|
||||
|
||||
# Takes input data and formats it to be easier to use in the spacy pipeline
|
||||
# ASSUMES DATA FOLLOWS FORMAT X = [string], y = [[(start offset, stop offset, label), ()], ... []]
|
||||
# Simpletransformers needs a pandas dataframe with columns: sentence_id, words, labels
|
||||
def _format_data(self, X: typing.Iterable[str], y) -> pd.DataFrame:
|
||||
# TODO: Need to check to make sure no sentence has over max_seq_length words
|
||||
df = pd.DataFrame(columns=["sentence_id","words","labels"])
|
||||
curr_sentence_id = 0
|
||||
for (doc_txt, labels) in zip(X, y):
|
||||
for (sentence_start, _, sentence) in self._sentencize(doc_txt):
|
||||
for (sentence_word_start, sentence_word_end) in self.__word_tokenizer.span_tokenize(sentence):
|
||||
word_start = sentence_start + sentence_word_start
|
||||
word_end = sentence_start + sentence_word_end
|
||||
word = doc_txt[word_start:word_end]
|
||||
curr_label = self._get_word_label(word_start, word_end, labels)
|
||||
df = df.append({
|
||||
"sentence_id": curr_sentence_id,
|
||||
"words": word,
|
||||
"labels": curr_label
|
||||
}, ignore_index=True)
|
||||
curr_sentence_id += 1
|
||||
|
||||
return df
|
||||
|
||||
# Takes the prediction output of simpletransformers ([[{'U.N.': 'B-per'}], [{'relief': 'I-gpe'}], ...])
|
||||
# and turns it into the form PINE desires, [[[offset_start, offset_end, label], ..., ...]
|
||||
def _format_prediction(self, data, predictions) -> DocumentPredictions:
|
||||
ner: typing.List[NerPrediction] = []
|
||||
for (index, sentence_predictions) in enumerate(predictions):
|
||||
sentence_start, _, sentence = data[index]
|
||||
current_label = None
|
||||
current_label_start = None
|
||||
current_label_end = None
|
||||
word_index = 0
|
||||
sentence_ner: typing.List[NerPrediction] = []
|
||||
for pred_dict in sentence_predictions:
|
||||
for (word, label) in pred_dict.items():
|
||||
word_index = sentence.find(word, word_index)
|
||||
if label == "O":
|
||||
if current_label != None:
|
||||
sentence_ner.append(NerPrediction(current_label_start, current_label_end, current_label))
|
||||
current_label = current_label_start = current_label_end = None
|
||||
continue
|
||||
|
||||
is_b = label.startswith("B-")
|
||||
is_i = label.startswith("I-")
|
||||
if is_b or is_i:
|
||||
label = label[2:]
|
||||
|
||||
# if we're at the beginning, we always add the old tag
|
||||
# if we're at an inner and it's different from the current label, add the old tag
|
||||
if current_label != None and (is_b or (is_i and label != current_label)):
|
||||
sentence_ner.append(NerPrediction(current_label_start, current_label_end, current_label))
|
||||
current_label = current_label_start = current_label_end = None
|
||||
|
||||
if current_label != None: # continuing the label
|
||||
current_label_end = sentence_start + word_index + len(word)
|
||||
else: # new label
|
||||
current_label = label
|
||||
current_label_start = sentence_start + word_index
|
||||
current_label_end = sentence_start + word_index + len(word)
|
||||
|
||||
# the last label
|
||||
if current_label != None:
|
||||
sentence_ner.append(NerPrediction(current_label_start, current_label_end, current_label))
|
||||
ner += sentence_ner
|
||||
|
||||
return DocumentPredictions(ner, [])
|
||||
|
||||
# Get a list of all labels in a set of data
|
||||
def _format_labels(self, all_labels: typing.List[str]):
|
||||
# Have to add a B-<label> and I-<label> for each label.
|
||||
ret_labels = []
|
||||
for label in all_labels:
|
||||
ret_labels.append("B-" + str(label))
|
||||
ret_labels.append("I-" + str(label))
|
||||
|
||||
# Add the other tag
|
||||
ret_labels.append("O")
|
||||
return ret_labels
|
||||
@@ -19,7 +19,7 @@ from spacy.scorer import Scorer
|
||||
from spacy.gold import GoldParse
|
||||
from overrides import overrides
|
||||
|
||||
from .pipeline import Pipeline, NerPrediction, DocumentPredictions, NerPredictionProbabilities, DocumentPredictionProbabilities
|
||||
from .pipeline import Pipeline, NerPrediction, DocumentPredictions, NerPredictionProbabilities, DocumentPredictionProbabilities, EvaluationMetrics, StatMetrics
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -74,7 +74,7 @@ class spacy_NER(Pipeline):
|
||||
}
|
||||
|
||||
@overrides
|
||||
def fit(self, X, y, **params) -> dict:
|
||||
def fit(self, X: typing.Iterable[str], y, all_labels: typing.Iterable[str], **params) -> dict:
|
||||
#setting up params
|
||||
default_params = self.__default_fit_params.copy()
|
||||
if params is not None:
|
||||
@@ -118,25 +118,24 @@ class spacy_NER(Pipeline):
|
||||
"losses": all_losses
|
||||
}
|
||||
|
||||
def evaluate(self, X, y, Xid):
|
||||
@overrides
|
||||
def evaluate(self, X: typing.Iterable[str], y, all_labels: typing.Iterable[str], **kwargs) -> EvaluationMetrics:
|
||||
train_data = self.format_data(X, y)
|
||||
all_labels = set()
|
||||
metrics = dict()
|
||||
metrics = EvaluationMetrics()
|
||||
# get all labels
|
||||
for text, annot in train_data:
|
||||
for ent in annot['entities']:
|
||||
all_labels.add(ent[2])
|
||||
all_labels = list(all_labels)
|
||||
stats = {}
|
||||
|
||||
for text, annots in train_data:
|
||||
pred_doc = self.__nlp(text)
|
||||
gold_doc = self.__nlp.make_doc(text)
|
||||
gold_labels = []
|
||||
|
||||
stats['Totals'] = [0,0,0,0]
|
||||
for label in all_labels:
|
||||
stats[label] = [0,0,0,0]
|
||||
metrics.labels[label] = StatMetrics()
|
||||
|
||||
for token in pred_doc:
|
||||
gold_labels.append(set())
|
||||
@@ -149,11 +148,9 @@ class spacy_NER(Pipeline):
|
||||
|
||||
goldParse = GoldParse(gold_doc, entities=annotations_for_label)
|
||||
for index, annotation in enumerate(goldParse.ner):
|
||||
|
||||
if annotation != 'O':
|
||||
gold_labels[index].add(annotation[2:])
|
||||
|
||||
|
||||
for index, pred_token in enumerate(pred_doc):
|
||||
pred_label = pred_token.ent_type_
|
||||
if pred_label != '':
|
||||
@@ -161,56 +158,33 @@ class spacy_NER(Pipeline):
|
||||
if label == pred_label:
|
||||
if label in gold_labels[index]:
|
||||
#TP
|
||||
stats[label][0] += 1
|
||||
stats['Totals'][0] += 1
|
||||
metrics.labels[label].tp += 1
|
||||
metrics.totals.tp += 1
|
||||
else:
|
||||
#FP
|
||||
stats[label][1] += 1
|
||||
stats['Totals'][1] += 1
|
||||
metrics.labels[label].fp += 1
|
||||
metrics.totals.fp += 1
|
||||
|
||||
else:
|
||||
#All other labels are true negative because the model can only predict one label per token
|
||||
#TN
|
||||
|
||||
stats[label][3] += 1
|
||||
stats['Totals'][3] += 1
|
||||
|
||||
metrics.labels[label].tn += 1
|
||||
metrics.totals.tn += 1
|
||||
|
||||
else:
|
||||
for label in all_labels:
|
||||
if label in gold_labels[index]:
|
||||
#FN
|
||||
stats[label][2] += 1
|
||||
stats['Totals'][2] += 1
|
||||
metrics.labels[label].fn += 1
|
||||
metrics.totals.fn += 1
|
||||
|
||||
else:
|
||||
#TN
|
||||
stats[label][3] += 1
|
||||
stats['Totals'][3] += 1
|
||||
metrics.labels[label].tn += 1
|
||||
metrics.totals.tn += 1
|
||||
|
||||
for key in stats:
|
||||
TP = stats[key][0]
|
||||
FP = stats[key][1]
|
||||
FN = stats[key][2]
|
||||
TN = stats[key][3]
|
||||
if (TP + FN) != 0:
|
||||
recall = TP / (TP + FN)
|
||||
else:
|
||||
recall = 1.0
|
||||
if (TP + FP) != 0:
|
||||
precision = TP / (TP + FP)
|
||||
else:
|
||||
precision = 0.0
|
||||
if (precision + recall) != 0:
|
||||
f1 = 2 * (precision * recall) / (precision + recall)
|
||||
else:
|
||||
f1 = 0
|
||||
if (TP + FN + FP + TN) != 0:
|
||||
acc = (TP + TN) / (TP + FN + FP + TN)
|
||||
else:
|
||||
acc = 0
|
||||
metrics[key] = {'precision': precision, 'recall': recall, 'f1': f1, 'TP': TP, 'FP': FP, 'FN': FN, "TN": TN,
|
||||
"acc": acc}
|
||||
metrics.calc_precision_recall_f1_acc()
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
@@ -356,14 +330,14 @@ class spacy_NER(Pipeline):
|
||||
|
||||
@overrides
|
||||
# TODO
|
||||
def next_example(self, X, Xid):
|
||||
def next_example(self, X: typing.Iterable[str], Xid):
|
||||
return
|
||||
|
||||
## EXTRA METHODS TO HELP WITH THE SPACY PIPELINE ##
|
||||
|
||||
# Takes input data and formats it to be easier to use in the spacy pipeline
|
||||
# ASSUMES DATA FOLLOWS FORMAT X = [string], y = [[(start offset, stop offset, label), ()], ... []]
|
||||
def format_data(self, X, y):
|
||||
def format_data(self, X: typing.Iterable[str], y):
|
||||
out = []
|
||||
for i, text in enumerate(X):
|
||||
out.append((text, {'entities': [(labels) for labels in y[i]]}))
|
||||
@@ -374,11 +348,11 @@ class spacy_NER(Pipeline):
|
||||
self.__ner.add_label(entity)
|
||||
|
||||
@overrides
|
||||
def save_model(self, model_name):
|
||||
def save_model(self, model_name: str):
|
||||
self.__nlp.to_disk(model_name)
|
||||
logger.info('Saved model to ' + model_name)
|
||||
return model_name
|
||||
|
||||
@overrides
|
||||
def load_model(self, model_name):
|
||||
def load_model(self, model_name: str):
|
||||
self._load_model(model_path=model_name)
|
||||
|
||||
@@ -161,6 +161,9 @@ def main():
|
||||
if not os.path.isfile(redis_start):
|
||||
lock_print("Couldn't find redis start script: {}.".format(redis_start))
|
||||
return 1
|
||||
|
||||
pipeline_dir = os.path.join(DIR, "pipelines")
|
||||
pipeline_start = os.path.join(pipeline_dir, "dev_run.sh")
|
||||
|
||||
backend_dir = os.path.join(DIR, "backend")
|
||||
backend_start = os.path.join(backend_dir, "dev_run.sh")
|
||||
@@ -179,24 +182,21 @@ def main():
|
||||
if docker:
|
||||
frontend_annotation_start = [frontend_annotation_start, "--", "--host", "0.0.0.0"]
|
||||
|
||||
pipeline_dir = os.path.join(DIR, "pipelines")
|
||||
pipeline_start = os.path.join(pipeline_dir, "dev_run.sh")
|
||||
|
||||
eve_process = start_eve_process(eve_dir, eve_start)
|
||||
if not eve_only:
|
||||
redis_process = start_redis_process(redis_dir, redis_start)
|
||||
pipeline_process = start_pipeline(pipeline_dir, pipeline_start)
|
||||
backend_process = start_backend_process(backend_dir, backend_start)
|
||||
if not eve_only and not backend_only:
|
||||
frontend_annotation_process = start_frontend_annotation_process(frontend_annotation_dir, frontend_annotation_start)
|
||||
pipeline_process = start_pipeline(pipeline_dir, pipeline_start)
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
lock_print("")
|
||||
if not eve_only and not backend_only:
|
||||
stop_pipeline(pipeline_process)
|
||||
stop_frontend_annotation_process(frontend_annotation_process)
|
||||
if not eve_only:
|
||||
stop_backend_process(backend_process)
|
||||
stop_pipeline(pipeline_process)
|
||||
stop_redis_process(redis_process)
|
||||
stop_eve_process(eve_process)
|
||||
lock_print("")
|
||||
|
||||
@@ -39,7 +39,7 @@
|
||||
"labels": ["geo", "gpe", "per", "org", "tim", "art"],
|
||||
"metadata": {
|
||||
"title": "Small Collection",
|
||||
"description": "This is a small collection"
|
||||
"description": "This is a small collection using spaCy pipeline"
|
||||
},
|
||||
"archived": false,
|
||||
"configuration": {
|
||||
@@ -156,5 +156,35 @@
|
||||
"text_column": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"collection": {
|
||||
"creator_id": "ada",
|
||||
"annotators": ["ada"],
|
||||
"viewers": ["ada"],
|
||||
"labels": ["geo", "gpe", "per", "org", "tim", "art"],
|
||||
"metadata": {
|
||||
"title": "Small Collection Simpletransformers",
|
||||
"description": "This is a small collection using Simpletransformers pipeline"
|
||||
},
|
||||
"archived": false,
|
||||
"configuration": {
|
||||
"allow_overlapping_ner_annotations": false
|
||||
}
|
||||
}, "classifier": {
|
||||
"pipelineId": "5babb6ee4eb7dd2c39b96720",
|
||||
"overlap": 0,
|
||||
"train_every": 5,
|
||||
"classifierParameters": {
|
||||
"cutoff": 1,
|
||||
"iterations": 5
|
||||
}
|
||||
}, "documents": {
|
||||
"ner_annotations": {
|
||||
"csv_file": "./ner_dataset.csv",
|
||||
"sentences_per_doc": 5
|
||||
},
|
||||
"num_docs": 5
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -40,5 +40,15 @@
|
||||
"use_type_seqs2": [true, false],
|
||||
"use_type_y_sequences": [true, false]
|
||||
}
|
||||
},
|
||||
{
|
||||
"_id": "5babb6ee4eb7dd2c39b96720",
|
||||
"title": "SimpleTransformers - Bio-ClinicalBERT",
|
||||
"description": "SimpleTransformers models.",
|
||||
"name": "simpletransformers",
|
||||
"parameters": {
|
||||
"training_batch_size": "integer",
|
||||
"num_train_epochs": "integer"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -239,7 +239,18 @@ def test_train_and_predict_opennlp():
|
||||
[972, 976, 'gpe'], [1025, 1029, 'gpe'], [1089, 1096, 'geo'], [1113, 1120, 'gpe'],
|
||||
[1200, 1209, 'tim'], [1221, 1225, 'org']]
|
||||
|
||||
def test_sync_train():
|
||||
def test_train_and_predict_simpletransformers():
|
||||
prediction = _test_train_and_predict("Small Collection Simpletransformers")
|
||||
assert len(prediction["doc"]) == 0
|
||||
preds = prediction["ner"]
|
||||
# unfortunately the simpletransformers predictions are not the same across runs
|
||||
# and there don't seem to be guaranteed common tokens
|
||||
# so just make sure any predictions have proper labels...
|
||||
common_labels = {'gpe', 'org', 'geo', 'tim', 'per'}
|
||||
for pred in preds:
|
||||
assert pred[2] in common_labels
|
||||
|
||||
def test_sync_train():
|
||||
client = common.login_with_test_user(common.client())
|
||||
|
||||
collection = common.get_collection(client, "Small Collection OpenNLP")
|
||||
|
||||
Reference in New Issue
Block a user