{"id":"https://openalex.org/W7155389090","doi":"https://doi.org/10.48550/arxiv.2604.20146","title":"SAKE: Self-aware Knowledge Exploitation-Exploration for Grounded Multimodal Named Entity Recognition","display_name":"SAKE: Self-aware Knowledge Exploitation-Exploration for Grounded Multimodal Named Entity Recognition","publication_year":2026,"publication_date":"2026-04-22","ids":{"openalex":"https://openalex.org/W7155389090","doi":"https://doi.org/10.48550/arxiv.2604.20146"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.20146","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.20146","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.20146","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074009618","display_name":"Jielong Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tang, Jielong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134453550","display_name":"Xujie Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Xujie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134366033","display_name":"Jiayang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jiayang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134456595","display_name":"Jianxing Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Jianxing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134390707","display_name":"Xiao Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Xiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134453241","display_name":"Lin Yee Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Lin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004590814","display_name":"Yunlai Teng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Teng, Yunlai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134459916","display_name":"Shimin Di","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Di, Shimin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134372493","display_name":"Jian Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Jian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5074009618"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7501999735832214,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7501999735832214,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.1867000013589859,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.013299999758601189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.578499972820282},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5751000046730042},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4860999882221222},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.4397999942302704},{"id":"https://openalex.org/keywords/structured-prediction","display_name":"Structured prediction","score":0.4120999872684479},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.39079999923706055},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.37770000100135803},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.3375999927520752},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.33640000224113464}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7986999750137329},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6014000177383423},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.578499972820282},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5751000046730042},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49729999899864197},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4860999882221222},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4397999942302704},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.4120999872684479},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.39079999923706055},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.37770000100135803},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.3375999927520752},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.33640000224113464},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.33079999685287476},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3305000066757202},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.3278999924659729},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.31790000200271606},{"id":"https://openalex.org/C2780613888","wikidata":"https://www.wikidata.org/wiki/Q6423394","display_name":"Knowledge retrieval","level":3,"score":0.29829999804496765},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.29330000281333923},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C96711827","wikidata":"https://www.wikidata.org/wiki/Q17012245","display_name":"Entity linking","level":3,"score":0.28200000524520874},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.2809000015258789},{"id":"https://openalex.org/C115925183","wikidata":"https://www.wikidata.org/wiki/Q1412694","display_name":"Knowledge-based systems","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C143587482","wikidata":"https://www.wikidata.org/wiki/Q1543216","display_name":"Iterative and incremental development","level":2,"score":0.2632000148296356},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C2777220311","wikidata":"https://www.wikidata.org/wiki/Q6423340","display_name":"Knowledge acquisition","level":2,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.20146","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.20146","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.20146","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.20146","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7725668549537659,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Grounded":[0],"Multimodal":[1,70],"Named":[2],"Entity":[3],"Recognition":[4],"(GMNER)":[5],"aims":[6],"to":[7,37,104,158,205,211],"extract":[8],"named":[9],"entities":[10],"and":[11,44,102,121,128,181],"localize":[12],"their":[13],"visual":[14],"regions":[15],"within":[16],"image-text":[17],"pairs,":[18],"serving":[19],"as":[20],"a":[21,137,170,194],"pivotal":[22],"capability":[23],"for":[24],"various":[25],"downstream":[26],"applications.":[27],"In":[28],"open-world":[29],"social":[30,227],"media":[31,228],"platforms,":[32],"GMNER":[33],"remains":[34],"challenging":[35],"due":[36],"the":[38,97,150,176,203],"prevalence":[39],"of":[40,100],"long-tailed,":[41],"rapidly":[42],"evolving,":[43],"unseen":[45],"entities.":[46],"To":[47,106],"tackle":[48],"this,":[49,108],"existing":[50],"approaches":[51],"typically":[52],"rely":[53],"on":[54,87,164,223],"either":[55],"external":[56,122],"knowledge":[57,64,98,119,123],"exploration":[58,124],"through":[59,154,184],"heuristic":[60,76],"retrieval":[61,77,217],"or":[62,81],"internal":[63,92,118],"exploitation":[65,93,120],"via":[66,125,136],"iterative":[67],"refinement":[68],"in":[69],"Large":[71],"Language":[72],"Models":[73],"(MLLMs).":[74],"However,":[75],"often":[78],"introduces":[79],"noisy":[80],"conflicting":[82],"evidence":[83],"that":[84,116,174,198],"degrades":[85],"precision":[86],"known":[88],"entities,":[89],"while":[90],"solely":[91],"is":[94,218],"constrained":[95],"by":[96],"boundaries":[99],"MLLMs":[101],"prone":[103],"hallucinations.":[105],"address":[107],"we":[109,142,167,188],"propose":[110,143],"SAKE,":[111],"an":[112],"end-to-end":[113],"agentic":[114,190],"framework":[115],"harmonizes":[117],"self-aware":[126,213],"reasoning":[127],"adaptive":[129],"search":[130,209],"tool":[131],"invocation.":[132],"We":[133],"implement":[134],"this":[135],"two-stage":[138],"training":[139],"paradigm.":[140],"First,":[141],"Difficulty-aware":[144],"Search":[145],"Tag":[146],"Generation,":[147],"which":[148],"quantifies":[149],"model's":[151],"entity-level":[152],"uncertainty":[153],"multiple":[155],"forward":[156],"samplings":[157],"produce":[159],"explicit":[160],"knowledge-gap":[161],"signals.":[162],"Based":[163],"these":[165],"signals,":[166],"construct":[168],"SAKE-SeCoT,":[169],"high-quality":[171],"Chain-of-Thought":[172],"dataset":[173],"equips":[175],"model":[177,204],"with":[178,193],"basic":[179],"self-awareness":[180],"tool-use":[182],"capabilities":[183],"supervised":[185],"fine-tuning.":[186],"Second,":[187],"employ":[189],"reinforcement":[191],"learning":[192],"hybrid":[195],"reward":[196],"function":[197],"penalizes":[199],"unnecessary":[200],"retrieval,":[201],"enabling":[202],"evolve":[206],"from":[207],"rigid":[208],"imitation":[210],"genuine":[212],"decision-making":[214],"about":[215],"when":[216],"truly":[219],"necessary.":[220],"Extensive":[221],"experiments":[222],"two":[224],"widely":[225],"used":[226],"benchmarks":[229],"demonstrate":[230],"SAKE's":[231],"effectiveness.":[232]},"counts_by_year":[],"updated_date":"2026-04-24T06:07:52.864757","created_date":"2026-04-24T00:00:00"}
