{"id":"https://openalex.org/W4403780729","doi":"https://doi.org/10.1145/3664647.3681473","title":"UNER: A Unified Prediction Head for Named Entity Recognition in Visually-rich Documents","display_name":"UNER: A Unified Prediction Head for Named Entity Recognition in Visually-rich Documents","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403780729","doi":"https://doi.org/10.1145/3664647.3681473"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681473","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681473","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3664647.3681473","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050991415","display_name":"Yi Tu","orcid":"https://orcid.org/0000-0002-2184-4443"},"institutions":[{"id":"https://openalex.org/I4210087087","display_name":"Henan Tianguan Group (China)","ror":"https://ror.org/001rfde81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210087087"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Tu","raw_affiliation_strings":["Tiansuan Security Lab, Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-2184-4443","affiliations":[{"raw_affiliation_string":"Tiansuan Security Lab, Ant Group, Hangzhou, China","institution_ids":["https://openalex.org/I4210087087"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Chong Zhang","orcid":"https://orcid.org/0009-0002-6083-7593"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chong Zhang","raw_affiliation_strings":["Fudan University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0002-6083-7593","affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100750284","display_name":"Ya Guo","orcid":"https://orcid.org/0000-0002-9242-493X"},"institutions":[{"id":"https://openalex.org/I4210087087","display_name":"Henan Tianguan Group (China)","ror":"https://ror.org/001rfde81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210087087"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ya Guo","raw_affiliation_strings":["Tiansuan Security Lab, Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-9242-493X","affiliations":[{"raw_affiliation_string":"Tiansuan Security Lab, Ant Group, Hangzhou, China","institution_ids":["https://openalex.org/I4210087087"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103443097","display_name":"Huan Chen","orcid":"https://orcid.org/0000-0002-7280-2956"},"institutions":[{"id":"https://openalex.org/I4210087087","display_name":"Henan Tianguan Group (China)","ror":"https://ror.org/001rfde81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210087087"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huan Chen","raw_affiliation_strings":["Tiansuan Security Lab, Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-7280-2956","affiliations":[{"raw_affiliation_string":"Tiansuan Security Lab, Ant Group, Hangzhou, China","institution_ids":["https://openalex.org/I4210087087"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111060246","display_name":"Jinyang Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210087087","display_name":"Henan Tianguan Group (China)","ror":"https://ror.org/001rfde81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210087087"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinyang Tang","raw_affiliation_strings":["Tiansuan Security Lab, Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0009-5630-9756","affiliations":[{"raw_affiliation_string":"Tiansuan Security Lab, Ant Group, Hangzhou, China","institution_ids":["https://openalex.org/I4210087087"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101572850","display_name":"Huijia Zhu","orcid":"https://orcid.org/0009-0008-5784-7225"},"institutions":[{"id":"https://openalex.org/I4210087087","display_name":"Henan Tianguan Group (China)","ror":"https://ror.org/001rfde81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210087087"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huijia Zhu","raw_affiliation_strings":["Tiansuan Security Lab, Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0008-5784-7225","affiliations":[{"raw_affiliation_string":"Tiansuan Security Lab, Ant Group, Hangzhou, China","institution_ids":["https://openalex.org/I4210087087"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100360407","display_name":"Qi Zhang","orcid":"https://orcid.org/0000-0003-0947-4942"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Zhang","raw_affiliation_strings":["Fudan University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-0947-4942","affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6109,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.74711305,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"4890","last_page":"4898"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8018526434898376},{"id":"https://openalex.org/keywords/head","display_name":"Head (geology)","score":0.6719193458557129},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5689358115196228},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5098656415939331},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3766305148601532}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8018526434898376},{"id":"https://openalex.org/C2780312720","wikidata":"https://www.wikidata.org/wiki/Q5689100","display_name":"Head (geology)","level":2,"score":0.6719193458557129},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5689358115196228},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5098656415939331},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3766305148601532},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C114793014","wikidata":"https://www.wikidata.org/wiki/Q52109","display_name":"Geomorphology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681473","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681473","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3664647.3681473","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681473","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W2270423142","https://openalex.org/W2953356739","https://openalex.org/W2986619406","https://openalex.org/W2997154779","https://openalex.org/W3003484198","https://openalex.org/W3093218477","https://openalex.org/W3104953317","https://openalex.org/W3132296545","https://openalex.org/W3173325518","https://openalex.org/W3175225269","https://openalex.org/W3176851559","https://openalex.org/W3176900753","https://openalex.org/W3194594797","https://openalex.org/W3205981739","https://openalex.org/W4221166835","https://openalex.org/W4221167941","https://openalex.org/W4226020328","https://openalex.org/W4226470037","https://openalex.org/W4285105124","https://openalex.org/W4304013646","https://openalex.org/W4304014014","https://openalex.org/W4385570166","https://openalex.org/W4385574075","https://openalex.org/W4386065837","https://openalex.org/W4386075596","https://openalex.org/W4386075876","https://openalex.org/W4389524198"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"The":[0,64],"recognition":[1],"of":[2,74,85,98],"named":[3],"entities":[4,87],"in":[5,13,22,88,100,149],"visually-rich":[6],"documents":[7],"(VrD-NER)":[8],"plays":[9],"a":[10,44,72,110,142],"critical":[11],"role":[12],"various":[14,115],"real-world":[15],"scenarios":[16,153],"and":[17,34,77,124,151,154],"applications.":[18],"However,":[19],"the":[20,68,83,96,106,120,130,134],"research":[21],"VrD-NER":[23,62,69,116],"faces":[24],"three":[25],"major":[26],"challenges:":[27],"complex":[28],"document":[29,56,121],"layouts,":[30],"incorrect":[31],"reading":[32,78],"orders,":[33],"unsuitable":[35],"task":[36,70],"formulations.":[37],"To":[38],"address":[39],"these":[40],"challenges,":[41],"we":[42],"propose":[43],"query-aware":[45],"entity":[46,102,157],"extraction":[47,103,158],"head,":[48],"namely":[49],"UNER,":[50],"to":[51,58,118,133],"collaborate":[52],"with":[53],"existing":[54],"multi-modal":[55],"transformers":[57],"develop":[59],"more":[60],"robust":[61],"models.":[63],"UNER":[65,99,107],"head":[66,108],"considers":[67],"as":[71],"combination":[73],"sequence":[75],"labeling":[76],"order":[79],"prediction,":[80],"effectively":[81],"addressing":[82],"issues":[84],"discontinuous":[86],"documents.":[89],"Experimental":[90],"evaluations":[91],"on":[92,114],"diverse":[93],"datasets":[94,117],"demonstrate":[95],"effectiveness":[97],"improving":[101],"performance.":[104],"Moreover,":[105],"enables":[109],"supervised":[111],"pre-training":[112,131],"stage":[113,132],"enhance":[119],"transformer":[122],"backbones":[123],"exhibits":[125,155],"substantial":[126],"knowledge":[127],"transfer":[128],"from":[129],"fine-tuning":[135],"stage.":[136],"By":[137],"incorporating":[138],"universal":[139],"layout":[140],"understanding,":[141],"pre-trained":[143],"UNER-based":[144],"model":[145],"demonstrates":[146],"significant":[147],"advantages":[148],"few-shot":[150],"cross-linguistic":[152],"zero-shot":[156],"abilities.":[159]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
