{"id":"https://openalex.org/W2891320405","doi":"https://doi.org/10.18653/v1/d18-2014","title":"A Multilingual Information Extraction Pipeline for Investigative Journalism","display_name":"A Multilingual Information Extraction Pipeline for Investigative Journalism","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2891320405","doi":"https://doi.org/10.18653/v1/d18-2014","mag":"2891320405"},"language":"en","primary_location":{"id":"doi:10.18653/v1/d18-2014","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-2014","pdf_url":"https://www.aclweb.org/anthology/D18-2014.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/D18-2014.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068268204","display_name":"Gregor Wiedemann","orcid":"https://orcid.org/0000-0002-4239-295X"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Gregor Wiedemann","raw_affiliation_strings":["Language Technology Group Department of Informatics Universitt Hamburg, Germany","Universit\u00e4t Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Language Technology Group Department of Informatics Universitt Hamburg, Germany","institution_ids":["https://openalex.org/I884043246"]},{"raw_affiliation_string":"Universit\u00e4t Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039544387","display_name":"Seid Muhie Yimam","orcid":"https://orcid.org/0000-0002-8289-388X"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Seid Muhie Yimam","raw_affiliation_strings":["Universit\u00e4t Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Universit\u00e4t Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021287757","display_name":"Chris Biemann","orcid":"https://orcid.org/0000-0002-8449-9624"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Chris Biemann","raw_affiliation_strings":["Universit\u00e4t Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Universit\u00e4t Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5068268204"],"corresponding_institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"],"apc_list":null,"apc_paid":null,"fwci":1.6424,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.88691054,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"78","last_page":"83"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8063596487045288},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7731747031211853},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7562939524650574},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.6329461932182312},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5696896910667419},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5446909666061401},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.5073742270469666},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.49195143580436707},{"id":"https://openalex.org/keywords/german","display_name":"German","score":0.4621478021144867},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.46204090118408203},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.44271320104599},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.12545806169509888},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08459481596946716}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8063596487045288},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7731747031211853},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7562939524650574},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.6329461932182312},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5696896910667419},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5446909666061401},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.5073742270469666},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.49195143580436707},{"id":"https://openalex.org/C154775046","wikidata":"https://www.wikidata.org/wiki/Q188","display_name":"German","level":2,"score":0.4621478021144867},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.46204090118408203},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44271320104599},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.12545806169509888},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08459481596946716},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/d18-2014","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-2014","pdf_url":"https://www.aclweb.org/anthology/D18-2014.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1809.00221","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1809.00221","pdf_url":"https://arxiv.org/pdf/1809.00221","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2891320405","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1809.00221v1","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1809.00221","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1809.00221","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.18653/v1/d18-2014","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-2014","pdf_url":"https://www.aclweb.org/anthology/D18-2014.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.5600000023841858,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2891320405.pdf","grobid_xml":"https://content.openalex.org/works/W2891320405.grobid-xml"},"referenced_works_count":11,"referenced_works":["https://openalex.org/W1517509400","https://openalex.org/W1984416789","https://openalex.org/W2027855569","https://openalex.org/W2096797897","https://openalex.org/W2135415614","https://openalex.org/W2144331291","https://openalex.org/W2250600805","https://openalex.org/W2251585411","https://openalex.org/W2511841151","https://openalex.org/W2963863756","https://openalex.org/W3123136864"],"related_works":["https://openalex.org/W2895194298","https://openalex.org/W2902861067","https://openalex.org/W2470027144","https://openalex.org/W1837359179","https://openalex.org/W2008431074","https://openalex.org/W2753477019","https://openalex.org/W1967308467","https://openalex.org/W2976964932","https://openalex.org/W2810160739","https://openalex.org/W1531887935","https://openalex.org/W2548150405","https://openalex.org/W2072747082","https://openalex.org/W3099826714","https://openalex.org/W2100278306","https://openalex.org/W1055246418","https://openalex.org/W1528050464","https://openalex.org/W2425612555","https://openalex.org/W805360218","https://openalex.org/W2400998926","https://openalex.org/W2572152885"],"abstract_inverted_index":{"We":[0],"introduce":[1],"an":[2],"advanced":[3],"information":[4],"extraction":[5,115,171],"pipeline":[6,24],"to":[7,65,99,125,164],"automatically":[8],"process":[9],"very":[10],"large":[11,48,60],"collections":[12],"of":[13,20,36,62,78,82,96,116,141,149,159,172],"unstructured":[14],"textual":[15],"data":[16,88],"for":[17,31,154],"the":[18,32,97,107,113,131,138],"purpose":[19],"investigative":[21,142],"journalism.":[22],"The":[23,52],"serves":[25],"as":[26],"a":[27,47,59,93],"new":[28],"input":[29],"processor":[30],"upcoming":[33],"major":[34,134],"release":[35],"our":[37],"New/s/leak":[38],"2.0":[39],"software,":[40],"which":[41],"we":[42,128],"develop":[43],"in":[44,106,121,144],"cooperation":[45],"with":[46],"German":[49],"news":[50],"organization.":[51],"use":[53,139],"case":[54,140],"is":[55,110],"that":[56],"journalists":[57],"receive":[58],"collection":[61,98],"files":[63],"up":[64,163],"several":[66],"Gigabytes":[67],"containing":[68],"unknown":[69],"contents.":[70],"Collections":[71],"may":[72],"originate":[73],"either":[74],"from":[75,177],"official":[76],"disclosures":[77],"documents,":[79],"e.g.":[80],"Freedom":[81],"Information":[83],"Act":[84],"requests,":[85],"or":[86],"unofficial":[87],"leaks.":[89],"Our":[90],"software":[91],"prepares":[92],"visually-aided":[94],"exploration":[95],"quickly":[100],"learn":[101],"about":[102],"potential":[103],"stories":[104],"contained":[105],"data.":[108],"It":[109],"based":[111],"on":[112,130],"automatic":[114],"entities":[117,176],"and":[118,169,175],"their":[119],"co-occurrence":[120],"documents.":[122],"In":[123],"contrast":[124],"comparable":[126],"projects,":[127],"focus":[129],"following":[132],"three":[133],"requirements":[135],"particularly":[136],"serving":[137],"journalism":[143],"cross-border":[145],"collaborations:":[146],"1)":[147],"composition":[148],"multiple":[150],"state-of-the-art":[151],"NLP":[152],"tools":[153],"entity":[155],"extraction,":[156],"2)":[157],"support":[158],"multi-lingual":[160],"document":[161],"sets":[162],"40":[165],"languages,":[166],"3)":[167],"fast":[168],"easyto-use":[170],"full-text,":[173],"metadata":[174],"various":[178],"file":[179],"formats.":[180]},"counts_by_year":[{"year":2021,"cited_by_count":4}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
