{"id":"https://openalex.org/W4389888963","doi":"https://doi.org/10.1109/sibgrapi59091.2023.10347174","title":"NBID Dataset: Towards Robust Information Extraction in Official Documents","display_name":"NBID Dataset: Towards Robust Information Extraction in Official Documents","publication_year":2023,"publication_date":"2023-11-06","ids":{"openalex":"https://openalex.org/W4389888963","doi":"https://doi.org/10.1109/sibgrapi59091.2023.10347174"},"language":"en","primary_location":{"id":"doi:10.1109/sibgrapi59091.2023.10347174","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sibgrapi59091.2023.10347174","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 36th SIBGRAPI Conference on Graphics, Patterns and Images (SIBGRAPI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092749484","display_name":"Lucas Wojcik","orcid":null},"institutions":[{"id":"https://openalex.org/I52418104","display_name":"Universidade Federal do Paran\u00e1","ror":"https://ror.org/05syd6y78","country_code":"BR","type":"education","lineage":["https://openalex.org/I52418104"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Lucas Wojcik","raw_affiliation_strings":["Federal University of Paran&#x00E1;,Depart. of Informatics,Curitiba,PR,Brazil"],"affiliations":[{"raw_affiliation_string":"Federal University of Paran&#x00E1;,Depart. of Informatics,Curitiba,PR,Brazil","institution_ids":["https://openalex.org/I52418104"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013645472","display_name":"Luiz Coelho","orcid":"https://orcid.org/0000-0002-0561-0612"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luiz Coelho","raw_affiliation_strings":["unico - idTech,Brazil","unico - idTech, Brazil"],"affiliations":[{"raw_affiliation_string":"unico - idTech,Brazil","institution_ids":[]},{"raw_affiliation_string":"unico - idTech, Brazil","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066324951","display_name":"Roger Granada","orcid":"https://orcid.org/0000-0001-5908-9247"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roger Granada","raw_affiliation_strings":["unico - idTech,Brazil","unico - idTech, Brazil"],"affiliations":[{"raw_affiliation_string":"unico - idTech,Brazil","institution_ids":[]},{"raw_affiliation_string":"unico - idTech, Brazil","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088097254","display_name":"Gustavo F\u00fchr","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gustavo F\u00fchr","raw_affiliation_strings":["unico - idTech,Brazil","unico - idTech, Brazil"],"affiliations":[{"raw_affiliation_string":"unico - idTech,Brazil","institution_ids":[]},{"raw_affiliation_string":"unico - idTech, Brazil","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033489756","display_name":"David Menotti","orcid":"https://orcid.org/0000-0003-2430-2030"},"institutions":[{"id":"https://openalex.org/I52418104","display_name":"Universidade Federal do Paran\u00e1","ror":"https://ror.org/05syd6y78","country_code":"BR","type":"education","lineage":["https://openalex.org/I52418104"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"David Menotti","raw_affiliation_strings":["Federal University of Paran&#x00E1;,Depart. of Informatics,Curitiba,PR,Brazil"],"affiliations":[{"raw_affiliation_string":"Federal University of Paran&#x00E1;,Depart. of Informatics,Curitiba,PR,Brazil","institution_ids":["https://openalex.org/I52418104"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5092749484"],"corresponding_institution_ids":["https://openalex.org/I52418104"],"apc_list":null,"apc_paid":null,"fwci":0.123,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.44938563,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"145","last_page":"150"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8550132513046265},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6142441630363464},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5997960567474365},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.4980776309967041},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.4812597930431366},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.46913573145866394},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.45642372965812683},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.44760411977767944},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2679626941680908},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07066339254379272}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8550132513046265},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6142441630363464},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5997960567474365},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.4980776309967041},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4812597930431366},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.46913573145866394},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.45642372965812683},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44760411977767944},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2679626941680908},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07066339254379272},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sibgrapi59091.2023.10347174","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sibgrapi59091.2023.10347174","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 36th SIBGRAPI Conference on Graphics, Patterns and Images (SIBGRAPI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5199999809265137}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2738588019","https://openalex.org/W2915716523","https://openalex.org/W2922714365","https://openalex.org/W2963341956","https://openalex.org/W2964346820","https://openalex.org/W2997154779","https://openalex.org/W2999905431","https://openalex.org/W3003711898","https://openalex.org/W3034999214","https://openalex.org/W3104953317","https://openalex.org/W3119202513","https://openalex.org/W3128803091","https://openalex.org/W3138516171","https://openalex.org/W3163650427","https://openalex.org/W3176664887","https://openalex.org/W3176851559","https://openalex.org/W3201833923","https://openalex.org/W3205981739","https://openalex.org/W4304013646","https://openalex.org/W4312233877","https://openalex.org/W4313163001","https://openalex.org/W6684191040","https://openalex.org/W6778883912","https://openalex.org/W6787566904","https://openalex.org/W6788623665","https://openalex.org/W6809319315"],"related_works":["https://openalex.org/W2032233321","https://openalex.org/W3121970507","https://openalex.org/W2110028391","https://openalex.org/W54497855","https://openalex.org/W217960748","https://openalex.org/W3125814499","https://openalex.org/W2090827041","https://openalex.org/W2094012830","https://openalex.org/W187246281","https://openalex.org/W2079194830"],"abstract_inverted_index":{"The":[0],"Visual":[1],"Document":[2],"Understanding":[3],"(VDU)":[4],"task":[5],"is":[6,155],"of":[7,13,21,32,40,128],"great":[8],"interest":[9],"for":[10,46,99,117,149],"a":[11,71,92],"variety":[12],"organizations,":[14],"including":[15],"banks,":[16],"governments":[17],"and":[18,53,68,95,115,141,145],"schools,":[19],"all":[20],"which":[22,80],"would":[23],"benefit":[24,158],"from":[25,30],"reliable":[26],"automatic":[27],"information":[28],"extraction":[29],"pictures":[31],"documents.":[33],"However,":[34],"due":[35],"to":[36,56,77,124,157,165],"the":[37,41,61,74,96,120,126,129,136,147,166],"sensitive":[38],"nature":[39],"data,":[42],"creating":[43],"new":[44,93],"datasets":[45],"official":[47],"documents,":[48],"such":[49],"as":[50,60,162],"identity":[51],"cards":[52],"passports,":[54],"proves":[55],"be":[57,65,78],"very":[58],"challenging":[59],"data":[62,131,143],"must":[63],"first":[64],"safely":[66],"anonymized":[67],"synthesized.":[69],"Such":[70],"process":[72,154],"requires":[73],"source":[75],"images":[76],"modified,":[79],"may":[81],"impact":[82,127],"performance":[83],"on":[84,119,132],"VDU":[85,111],"models.":[86],"In":[87],"this":[88],"paper,":[89],"we":[90],"propose":[91],"dataset":[94],"synthesizer":[97],"used":[98,161],"its":[100],"generation,":[101],"both":[102,139,150],"made":[103],"publicly":[104],"available.":[105],"We":[106,134],"also":[107],"selected":[108],"three":[109],"state-of-the-art":[110],"models:":[112],"PICK,":[113],"StrucTexT,":[114],"DocFormer,":[116],"evaluation":[118],"dataset,":[121],"in":[122],"order":[123],"study":[125],"synthetic":[130],"performance.":[133],"trained":[135],"models":[137],"using":[138],"synthetic-only":[140],"synthetic-plus-real":[142],"protocols":[144],"present":[146],"results":[148],"datasets.":[151],"Our":[152],"synthesizing":[153],"shown":[156],"training":[159],"when":[160],"an":[163],"addition":[164],"real":[167],"data.":[168]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
