{"id":"https://openalex.org/W4399418562","doi":"https://doi.org/10.1145/3652583.3658011","title":"Contrastive Pre-training with Multi-level Alignment for Grounded Multimodal Named Entity Recognition","display_name":"Contrastive Pre-training with Multi-level Alignment for Grounded Multimodal Named Entity Recognition","publication_year":2024,"publication_date":"2024-05-30","ids":{"openalex":"https://openalex.org/W4399418562","doi":"https://doi.org/10.1145/3652583.3658011"},"language":"en","primary_location":{"id":"doi:10.1145/3652583.3658011","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658011","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658011","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658011","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050860447","display_name":"Xigang Bao","orcid":"https://orcid.org/0009-0002-3250-2403"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xigang Bao","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099043634","display_name":"Mengyuan Tian","orcid":null},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengyuan Tian","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Luyao Wang","orcid":"https://orcid.org/0009-0006-0565-3838"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Luyao Wang","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhiyuan Zha","orcid":"https://orcid.org/0000-0001-8702-4088"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyuan Zha","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102713872","display_name":"Biao Qin","orcid":"https://orcid.org/0000-0002-4304-675X"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Biao Qin","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5050860447"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":1.6963,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.86102418,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"795","last_page":"803"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8349338173866272},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6214047074317932},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6070918440818787},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.5020251274108887},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4567060172557831},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3593413233757019}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8349338173866272},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6214047074317932},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6070918440818787},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.5020251274108887},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4567060172557831},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3593413233757019},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3652583.3658011","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658011","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658011","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3652583.3658011","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658011","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658011","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6350159928","display_name":null,"funder_award_id":"61772534","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322499","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399418562.pdf","grobid_xml":"https://content.openalex.org/works/W4399418562.grobid-xml"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W2745461083","https://openalex.org/W2788647998","https://openalex.org/W2798298921","https://openalex.org/W2963109634","https://openalex.org/W3035448883","https://openalex.org/W3092692431","https://openalex.org/W3127151332","https://openalex.org/W3173220247","https://openalex.org/W3174525637","https://openalex.org/W3176858586","https://openalex.org/W3207972321","https://openalex.org/W4205509257","https://openalex.org/W4212998232","https://openalex.org/W4229024390","https://openalex.org/W4293518017","https://openalex.org/W4304015092","https://openalex.org/W4306820534","https://openalex.org/W4312933868","https://openalex.org/W4321488427","https://openalex.org/W4382466550","https://openalex.org/W4385570368","https://openalex.org/W4385571080","https://openalex.org/W4387848810","https://openalex.org/W4387968001","https://openalex.org/W4391164085","https://openalex.org/W6600769105","https://openalex.org/W6607167723"],"related_works":["https://openalex.org/W3199871245","https://openalex.org/W3005759282","https://openalex.org/W3017222382","https://openalex.org/W3128216712","https://openalex.org/W3204019825","https://openalex.org/W3136915866","https://openalex.org/W4390279576","https://openalex.org/W2886890203","https://openalex.org/W4313535650","https://openalex.org/W2287770975"],"abstract_inverted_index":{"Recently,":[0],"Grounded":[1],"Multimodal":[2,14],"Named":[3,15],"Entity":[4,16],"Recognition":[5,17],"(GM-NER)":[6],"task":[7],"has":[8],"been":[9],"introduced":[10],"to":[11,36,41,43,97,132,160],"refine":[12],"the":[13,48,51,78,130,158,168,175,187],"(MNER)":[18],"task.Existing":[19],"MNER":[20],"studies":[21],"fall":[22],"short":[23],"in":[24,59,71],"that":[25,128,142,154,179],"they":[26],"merely":[27],"focus":[28],"on":[29,157],"extracting":[30],"text-based":[31],"entity-type":[32],"pairs,":[33],"often":[34,143],"leading":[35],"entity":[37,63,116,206],"ambiguities":[38],"and":[39,65,80,94],"failing":[40],"contribute":[42],"multimodal":[44,102,170,205],"knowledge":[45],"graph":[46],"construction.In":[47],"GMNER":[49,176],"task,":[50],"objective":[52],"becomes":[53],"more":[54,133],"challenging:":[55],"identifying":[56],"named":[57],"entities":[58,136],"text,":[60],"determining":[61],"their":[62,67,204],"types,":[64],"locating":[66],"corresponding":[68,163],"bounding":[69],"boxes":[70],"linked":[72],"images,":[73,111],"necessitating":[74],"precise":[75],"alignment":[76,87],"between":[77,101],"textual":[79,115,138],"visual":[81,164],"information.We":[82],"introduce":[83],"a":[84,150,162],"novel":[85],"multi-level":[86],"pre-training":[88,191],"method,":[89],"engaging":[90],"with":[91,114],"both":[92],"text-image":[93],"entity-object":[95],"dimensions":[96],"foster":[98],"deeper":[99],"congruence":[100],"data.Specifically,":[103],"we":[104],"innovatively":[105],"harness":[106],"potential":[107],"objects":[108],"identified":[109],"within":[110],"aligning":[112],"them":[113],"prompts,":[117],"thereby":[118],"generating":[119],"refined":[120],"soft":[121],"pseudolabels.These":[122],"labels":[123],"serve":[124],"as":[125],"self-supervised":[126],"signals":[127],"pre-train":[129],"model":[131,153],"accurately":[134],"extract":[135],"from":[137,174],"input.To":[139],"address":[140],"misalignments":[141],"plague":[144],"modality":[145],"integration,":[146],"our":[147,180,190],"method":[148],"employs":[149],"sophisticated":[151],"diffusion":[152],"performs":[155],"back-translation":[156],"text":[159],"generate":[161],"representation,":[165],"thus":[166],"refining":[167],"model's":[169],"interpretative":[171],"accuracy.Empirical":[172],"evidence":[173],"dataset":[177],"validates":[178],"approach":[181],"significantly":[182],"outperforms":[183],"existing":[184],"state-of-theart":[185],"models.Moreover,":[186],"versatility":[188],"of":[189],"process":[192],"complements":[193],"virtually":[194],"all":[195],"extant":[196],"models,":[197],"offering":[198],"an":[199],"additional":[200],"avenue":[201],"for":[202],"augmenting":[203],"recognition":[207],"acumen.":[208]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
