{"id":"https://openalex.org/W4384636790","doi":"https://doi.org/10.1145/3539618.3591886","title":"Form-NLU: Dataset for the Form Natural Language Understanding","display_name":"Form-NLU: Dataset for the Form Natural Language Understanding","publication_year":2023,"publication_date":"2023-07-18","ids":{"openalex":"https://openalex.org/W4384636790","doi":"https://doi.org/10.1145/3539618.3591886"},"language":"en","primary_location":{"id":"doi:10.1145/3539618.3591886","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3539618.3591886","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074613052","display_name":"Yihao Ding","orcid":"https://orcid.org/0000-0001-5065-6911"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yihao Ding","raw_affiliation_strings":["The University of Sydney, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0001-5065-6911","affiliations":[{"raw_affiliation_string":"The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032358151","display_name":"Siqu Long","orcid":"https://orcid.org/0000-0003-0590-7587"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Siqu Long","raw_affiliation_strings":["The University of Sydney, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0003-0590-7587","affiliations":[{"raw_affiliation_string":"The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiabin Huang","orcid":"https://orcid.org/0009-0003-7978-656X"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jiabin Huang","raw_affiliation_strings":["The University of Sydney, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0009-0003-7978-656X","affiliations":[{"raw_affiliation_string":"The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102795708","display_name":"Kaixuan Ren","orcid":"https://orcid.org/0009-0006-5547-5240"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Kaixuan Ren","raw_affiliation_strings":["The University of Sydney, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0009-0006-5547-5240","affiliations":[{"raw_affiliation_string":"The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078816647","display_name":"Xingxiang Luo","orcid":"https://orcid.org/0009-0003-9024-3144"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xingxiang Luo","raw_affiliation_strings":["The University of Sydney, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0009-0003-9024-3144","affiliations":[{"raw_affiliation_string":"The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038455910","display_name":"Hyunsuk Chung","orcid":"https://orcid.org/0000-0001-5272-3778"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hyunsuk Chung","raw_affiliation_strings":["FortifyEdge, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0001-5272-3778","affiliations":[{"raw_affiliation_string":"FortifyEdge, Sydney, NSW, Australia","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084419965","display_name":"Soyeon Caren Han","orcid":"https://orcid.org/0000-0002-1948-6819"},"institutions":[{"id":"https://openalex.org/I177877127","display_name":"The University of Western Australia","ror":"https://ror.org/047272k79","country_code":"AU","type":"education","lineage":["https://openalex.org/I177877127"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Soyeon Caren Han","raw_affiliation_strings":["The University of Western Australia, Perth, WA, Australia"],"raw_orcid":"https://orcid.org/0000-0002-1948-6819","affiliations":[{"raw_affiliation_string":"The University of Western Australia, Perth, WA, Australia","institution_ids":["https://openalex.org/I177877127"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3397,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.82960512,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2807","last_page":"2816"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8252571821212769},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6530259847640991},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5822607278823853},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5463687181472778},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5432811975479126},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.5131633877754211},{"id":"https://openalex.org/keywords/relationship-extraction","display_name":"Relationship extraction","score":0.4972439110279083},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4702809154987335},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.41694384813308716},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4070166349411011},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.40439143776893616},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3885214924812317},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.16751542687416077}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8252571821212769},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6530259847640991},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5822607278823853},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5463687181472778},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5432811975479126},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.5131633877754211},{"id":"https://openalex.org/C153604712","wikidata":"https://www.wikidata.org/wiki/Q7310755","display_name":"Relationship extraction","level":3,"score":0.4972439110279083},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4702809154987335},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.41694384813308716},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4070166349411011},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40439143776893616},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3885214924812317},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.16751542687416077},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3539618.3591886","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3539618.3591886","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:publications/b82bbac8-426a-43ad-8a7e-e1d50e7d2988","is_oa":false,"landing_page_url":"https://research-repository.uwa.edu.au/en/publications/b82bbac8-426a-43ad-8a7e-e1d50e7d2988","pdf_url":null,"source":{"id":"https://openalex.org/S4306402523","display_name":"UWA Profiles and Research Repository (University of Western Australia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I177877127","host_organization_name":"The University of Western Australia","host_organization_lineage":["https://openalex.org/I177877127"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Ding, Y, Long, S, Huang, J, Ren, K, Luo, X, Chung, H & Han, S C 2023, Form-NLU: Dataset for the Form Natural Language Understanding. in SIGIR '23: Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval. SIGIR 2023 - Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, Association for Computing Machinery (ACM), pp. 2807-2816. https://doi.org/10.1145/3539618.3591886","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6399999856948853,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2263810708","https://openalex.org/W2740887992","https://openalex.org/W2963150697","https://openalex.org/W2986619406","https://openalex.org/W2997154779","https://openalex.org/W3003711898","https://openalex.org/W3034336960","https://openalex.org/W3104953317","https://openalex.org/W3107064625","https://openalex.org/W3113753692","https://openalex.org/W3118722740","https://openalex.org/W3120043490","https://openalex.org/W3121289818","https://openalex.org/W3132296545","https://openalex.org/W3173585224","https://openalex.org/W3176851559","https://openalex.org/W3201693581","https://openalex.org/W4221167941","https://openalex.org/W4281641249","https://openalex.org/W4285105124","https://openalex.org/W4304013646","https://openalex.org/W6600751047"],"related_works":["https://openalex.org/W2732813147","https://openalex.org/W2143460112","https://openalex.org/W842810586","https://openalex.org/W4319940250","https://openalex.org/W2352298027","https://openalex.org/W2042906257","https://openalex.org/W2888033806","https://openalex.org/W2092919065","https://openalex.org/W3138801416","https://openalex.org/W4236762297"],"abstract_inverted_index":{"Compared":[0],"to":[1],"general":[2],"document":[3,7],"analysis":[4],"tasks,":[5],"form":[6,25,30,36,41,50,58,66,81,93,108,111,126,135,147,165],"structure":[8,31,82],"understanding":[9,83],"and":[10,32,34,62,84,87,96,113,115,119,130,137,144,186,199],"retrieval":[11],"are":[12,16],"challenging.":[13],"Form":[14],"documents":[15],"typically":[17],"made":[18],"by":[19],"two":[20],"types":[21,183],"of":[22,99,106,184],"authors;":[23],"A":[24],"designer,":[26],"who":[27,38],"develops":[28],"the":[29,45,49,57,76,92,97,164,170,176,193],"keys,":[33],"a":[35,65,141],"user,":[37],"fills":[39],"out":[40],"values":[42,51],"based":[43],"on":[44,102,175],"provided":[46],"keys.":[47,187],"Hence,":[48],"may":[52],"not":[53],"be":[54],"aligned":[55],"with":[56,192],"designer's":[59,94],"intention":[60],"(structure":[61],"keys)":[63],"if":[64],"user":[67],"gets":[68],"confused.":[69],"In":[70],"this":[71,153],"paper,":[72],"we":[73,156,189],"introduce":[74],"Form-NLU,":[75,155],"first":[77,157],"novel":[78],"dataset":[79,122],"for":[80,163,181],"its":[85,201],"key":[86,171],"value":[88,101],"information":[89,149,172],"extraction,":[90],"interpreting":[91],"intent":[95],"alignment":[98],"user-written":[100],"it.":[103],"It":[104],"consists":[105],"857":[107],"images,":[109],"6k":[110],"keys":[112,118],"values,":[114],"4k":[116],"table":[117],"values.":[120],"Our":[121],"also":[123],"includes":[124],"three":[125],"types:":[127],"digital,":[128],"printed,":[129],"handwritten,":[131],"which":[132],"cover":[133],"diverse":[134],"appearances":[136],"layouts.":[138],"We":[139],"propose":[140],"robust":[142],"positional":[143],"logical":[145],"relation-based":[146],"key-value":[148],"extraction":[150,173,197],"framework.":[151],"Using":[152],"dataset,":[154,177],"examine":[158,190],"strong":[159],"object":[160],"detection":[161],"models":[162],"layout":[166,196],"understanding,":[167],"then":[168],"evaluate":[169],"task":[174],"providing":[178],"fine-grained":[179],"results":[180],"different":[182],"forms":[185],"Furthermore,":[188],"it":[191],"off-the-shelf":[194],"pdf":[195],"tool":[198],"prove":[200],"feasibility":[202],"in":[203],"real-world":[204],"cases.":[205]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2}],"updated_date":"2026-06-18T10:00:31.954636","created_date":"2025-10-10T00:00:00"}
