{"id":"https://openalex.org/W3201693581","doi":"https://doi.org/10.1007/978-3-030-86549-8_36","title":"Kleister: Key Information Extraction Datasets Involving Long Documents with Complex Layouts","display_name":"Kleister: Key Information Extraction Datasets Involving Long Documents with Complex Layouts","publication_year":2021,"publication_date":"2021-05-12","ids":{"openalex":"https://openalex.org/W3201693581","doi":"https://doi.org/10.1007/978-3-030-86549-8_36","mag":"3201693581"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2105.05796","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2105.05796","pdf_url":"https://arxiv.org/pdf/2105.05796","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"book-chapter","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2105.05796","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054685387","display_name":"Tomasz Stanis\u0142awek","orcid":"https://orcid.org/0000-0003-1046-7563"},"institutions":[{"id":"https://openalex.org/I108403487","display_name":"Warsaw University of Technology","ror":"https://ror.org/00y0xnp53","country_code":"PL","type":"education","lineage":["https://openalex.org/I108403487"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Stanis\u0142awek, Tomasz","raw_affiliation_strings":["Warsaw University of Technology, Warsaw, Poland","Applica.ai, Warsaw, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Warsaw University of Technology, Warsaw, Poland","institution_ids":["https://openalex.org/I108403487"]},{"raw_affiliation_string":"Applica.ai, Warsaw, Poland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077188385","display_name":"Filip Grali\u0144ski","orcid":"https://orcid.org/0000-0001-8066-4533"},"institutions":[{"id":"https://openalex.org/I59411706","display_name":"Adam Mickiewicz University in Pozna\u0144","ror":"https://ror.org/04g6bbq64","country_code":"PL","type":"education","lineage":["https://openalex.org/I59411706"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Grali\u0144ski, Filip","raw_affiliation_strings":["Adam Mickiewicz University, Poznan, Poland","Applica.ai, Warsaw, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adam Mickiewicz University, Poznan, Poland","institution_ids":["https://openalex.org/I59411706"]},{"raw_affiliation_string":"Applica.ai, Warsaw, Poland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031984813","display_name":"Anna Wr\u00f3blewska","orcid":"https://orcid.org/0000-0002-3407-7570"},"institutions":[{"id":"https://openalex.org/I108403487","display_name":"Warsaw University of Technology","ror":"https://ror.org/00y0xnp53","country_code":"PL","type":"education","lineage":["https://openalex.org/I108403487"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Wr\u00f3blewska, Anna","raw_affiliation_strings":["Warsaw University of Technology, Warsaw, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Warsaw University of Technology, Warsaw, Poland","institution_ids":["https://openalex.org/I108403487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069264762","display_name":"Dawid Lipi\u0144ski","orcid":"https://orcid.org/0000-0003-0156-605X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lipi\u0144ski, Dawid","raw_affiliation_strings":["Applica.ai, Warsaw, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Applica.ai, Warsaw, Poland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028668367","display_name":"Agnieszka Kaliska","orcid":"https://orcid.org/0000-0003-2856-8901"},"institutions":[{"id":"https://openalex.org/I59411706","display_name":"Adam Mickiewicz University in Pozna\u0144","ror":"https://ror.org/04g6bbq64","country_code":"PL","type":"education","lineage":["https://openalex.org/I59411706"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Kaliska, Agnieszka","raw_affiliation_strings":["Adam Mickiewicz University, Poznan, Poland","Applica.ai, Warsaw, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adam Mickiewicz University, Poznan, Poland","institution_ids":["https://openalex.org/I59411706"]},{"raw_affiliation_string":"Applica.ai, Warsaw, Poland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013527758","display_name":"Paulina Rosalska","orcid":"https://orcid.org/0000-0002-0231-5933"},"institutions":[{"id":"https://openalex.org/I3019271933","display_name":"Nicolaus Copernicus University","ror":"https://ror.org/0102mm775","country_code":"PL","type":"education","lineage":["https://openalex.org/I3019271933"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Rosalska, Paulina","raw_affiliation_strings":["Applica.ai, Warsaw, Poland","Nicolaus Copernicus University, Torun, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Applica.ai, Warsaw, Poland","institution_ids":[]},{"raw_affiliation_string":"Nicolaus Copernicus University, Torun, Poland","institution_ids":["https://openalex.org/I3019271933"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044886432","display_name":"Bartosz Topolski","orcid":"https://orcid.org/0000-0002-7126-9424"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Topolski, Bartosz","raw_affiliation_strings":["Applica.ai, Warsaw, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Applica.ai, Warsaw, Poland","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049061860","display_name":"Przemys\u0142aw Biecek","orcid":"https://orcid.org/0000-0001-8423-1823"},"institutions":[{"id":"https://openalex.org/I108403487","display_name":"Warsaw University of Technology","ror":"https://ror.org/00y0xnp53","country_code":"PL","type":"education","lineage":["https://openalex.org/I108403487"]},{"id":"https://openalex.org/I4210128738","display_name":"Samsung (Poland)","ror":"https://ror.org/0381acm07","country_code":"PL","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210128738"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Biecek, Przemys\u0142aw","raw_affiliation_strings":["Warsaw University of Technology, Warsaw, Poland","Samsung R&D Institute Poland, Warsaw, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Warsaw University of Technology, Warsaw, Poland","institution_ids":["https://openalex.org/I108403487"]},{"raw_affiliation_string":"Samsung R&D Institute Poland, Warsaw, Poland","institution_ids":["https://openalex.org/I4210128738"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.5768,"has_fulltext":false,"cited_by_count":66,"citation_normalized_percentile":{"value":0.97226802,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9764999747276306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.895258367061615},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.7060964107513428},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.7045176029205322},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.6604940891265869},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.5749549269676208},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5730518102645874},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5635107159614563},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5524067282676697},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5015377998352051},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4757777750492096},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40121209621429443}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.895258367061615},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.7060964107513428},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.7045176029205322},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.6604940891265869},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.5749549269676208},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5730518102645874},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5635107159614563},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5524067282676697},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5015377998352051},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4757777750492096},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40121209621429443},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C126322002","wikidata":"https://www.wikidata.org/wiki/Q11180","display_name":"Internal medicine","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"pmh:oai:arXiv.org:2105.05796","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2105.05796","pdf_url":"https://arxiv.org/pdf/2105.05796","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2105.05796","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2105.05796","pdf_url":"https://arxiv.org/pdf/2105.05796","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.44999998807907104}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2144578941","https://openalex.org/W2510759893","https://openalex.org/W2880875857","https://openalex.org/W2891117443","https://openalex.org/W2893624139","https://openalex.org/W2896457183","https://openalex.org/W2919096156","https://openalex.org/W2922714365","https://openalex.org/W2963687456","https://openalex.org/W2964110616","https://openalex.org/W2965373594","https://openalex.org/W2986619406","https://openalex.org/W3000758063","https://openalex.org/W3003261556","https://openalex.org/W3006883036","https://openalex.org/W3015468748","https://openalex.org/W3103769805","https://openalex.org/W3104244947","https://openalex.org/W3104953317","https://openalex.org/W3105607850","https://openalex.org/W3120043490","https://openalex.org/W3204562006"],"related_works":["https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W2352448290","https://openalex.org/W2380820513","https://openalex.org/W2913146933","https://openalex.org/W2372385138","https://openalex.org/W4296359239","https://openalex.org/W2101155126","https://openalex.org/W2043093291","https://openalex.org/W2363545964"],"abstract_inverted_index":{"The":[0,85,107,151],"relevance":[1],"of":[2,53,75,90,95],"the":[3,131,163,167,173],"Key":[4],"Information":[5],"Extraction":[6],"(KIE)":[7],"task":[8],"is":[9,67],"increasingly":[10],"important":[11],"in":[12,32],"natural":[13],"language":[14],"processing":[15],"problems.":[16],"But":[17],"there":[18],"are":[19],"still":[20],"only":[21],"a":[22,51,145],"few":[23],"well-defined":[24],"problems":[25],"that":[26,141],"serve":[27],"as":[28],"benchmarks":[29],"for":[30],"solutions":[31],"this":[33,37],"area.":[34],"To":[35],"bridge":[36],"gap,":[38],"we":[39],"introduce":[40],"two":[41],"new":[42],"datasets":[43,143,174],"(Kleister":[44],"NDA":[45,109,165],"and":[46,55,81,102,119,157,166,181],"Kleister":[47,86,108,164,168],"Charity).":[48],"They":[49],"involve":[50],"mix":[52],"scanned":[54],"born-digital":[56],"long":[57],"formal":[58],"English-language":[59],"documents.":[60],"In":[61],"these":[62],"datasets,":[63],"an":[64,155,158],"NLP":[65],"system":[66],"expected":[68],"to":[69,105,122,148,175],"find":[70],"or":[71],"infer":[72],"various":[73],"types":[74],"entities":[76,104,121],"by":[77],"employing":[78],"both":[79],"textual":[80],"structural":[82],"layout":[83],"features.":[84],"Charity":[87,169],"dataset":[88,110],"consists":[89],"2,788":[91],"annual":[92],"financial":[93],"reports":[94],"charity":[96],"organizations,":[97],"with":[98,115],"61,643":[99],"unique":[100,117],"pages":[101,118],"21,612":[103],"extract.":[106,123],"has":[111],"540":[112],"Non-disclosure":[113],"Agreements,":[114],"3,229":[116],"2,160":[120],"We":[124,171],"provide":[125],"several":[126],"state-of-the-art":[127],"baseline":[128],"systems":[129],"from":[130],"KIE":[132],"domain":[133],"(Flair,":[134],"BERT,":[135],"RoBERTa,":[136],"LayoutLM,":[137],"LAMBERT),":[138],"which":[139],"show":[140],"our":[142],"pose":[144],"strong":[146],"challenge":[147],"existing":[149],"models.":[150],"best":[152],"model":[153],"achieved":[154],"81.77%":[156],"83.57%":[159],"F1-score":[160],"on":[161,178],"respectively":[162],"datasets.":[170],"share":[172],"encourage":[176],"progress":[177],"more":[179],"in-depth":[180],"complex":[182],"information":[183],"extraction":[184],"tasks.":[185]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":21},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2021-10-11T00:00:00"}
