{"id":"https://openalex.org/W4387623721","doi":"https://doi.org/10.1109/tim.2023.3324362","title":"Visual Grounding With Joint Multimodal Representation and Interaction","display_name":"Visual Grounding With Joint Multimodal Representation and Interaction","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4387623721","doi":"https://doi.org/10.1109/tim.2023.3324362"},"language":"en","primary_location":{"id":"doi:10.1109/tim.2023.3324362","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tim.2023.3324362","pdf_url":null,"source":{"id":"https://openalex.org/S10892749","display_name":"IEEE Transactions on Instrumentation and Measurement","issn_l":"0018-9456","issn":["0018-9456","1557-9662"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Instrumentation and Measurement","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002250162","display_name":"Hong Zhu","orcid":"https://orcid.org/0000-0002-2841-7129"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hong Zhu","raw_affiliation_strings":["College of Electronic Engineering, National University of Defense Technology, Hefei, China","College of Electronic Engineering, National University of Detense Technology, Hefei, China","Anhui Key Laboratory of Polarization Imaging Detection Technology, Army Artillery and Air Defense Academy of PLA, Hefei, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic Engineering, National University of Defense Technology, Hefei, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Electronic Engineering, National University of Detense Technology, Hefei, China","institution_ids":[]},{"raw_affiliation_string":"Anhui Key Laboratory of Polarization Imaging Detection Technology, Army Artillery and Air Defense Academy of PLA, Hefei, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070870347","display_name":"Qingyang Lu","orcid":"https://orcid.org/0009-0005-8005-7182"},"institutions":[{"id":"https://openalex.org/I4210115169","display_name":"Second Artillery General Hospital of Chinese People's Liberation Army","ror":"https://ror.org/0264qnp36","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210115169"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingyang Lu","raw_affiliation_strings":["Army Artillery and Air Defense Academy of PLA, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Army Artillery and Air Defense Academy of PLA, Hefei, China","institution_ids":["https://openalex.org/I4210115169"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115444850","display_name":"Lei Xue","orcid":"https://orcid.org/0009-0002-7355-9668"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Xue","raw_affiliation_strings":["College of Electronic Engineering, National University of Defense Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic Engineering, National University of Defense Technology, Hefei, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082531343","display_name":"Mengzhen Xue","orcid":"https://orcid.org/0009-0005-0368-0210"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mogen Xue","raw_affiliation_strings":["Anhui Key Laboratory of Polarization Imaging Detection Technology, Army Artillery and Air Defense Academy of PLA, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Anhui Key Laboratory of Polarization Imaging Detection Technology, Army Artillery and Air Defense Academy of PLA, Hefei, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102718971","display_name":"Guanglin Yuan","orcid":"https://orcid.org/0009-0008-7591-943X"},"institutions":[{"id":"https://openalex.org/I4210115169","display_name":"Second Artillery General Hospital of Chinese People's Liberation Army","ror":"https://ror.org/0264qnp36","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210115169"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanglin Yuan","raw_affiliation_strings":["Army Artillery and Air Defense Academy of PLA, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Army Artillery and Air Defense Academy of PLA, Hefei, China","institution_ids":["https://openalex.org/I4210115169"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058101262","display_name":"Bineng Zhong","orcid":"https://orcid.org/0000-0003-3423-1539"},"institutions":[{"id":"https://openalex.org/I29739308","display_name":"Guangxi Normal University","ror":"https://ror.org/02frt9q65","country_code":"CN","type":"education","lineage":["https://openalex.org/I29739308"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bineng Zhong","raw_affiliation_strings":["School of Computer Science and Engineering, Guangxi Normal University, Guilin, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Guangxi Normal University, Guilin, China","institution_ids":["https://openalex.org/I29739308"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5002250162"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":1.6727,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86526415,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"72","issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9861999750137329,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.750557541847229},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6267997026443481},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5473549365997314},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4853876829147339},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.48293372988700867},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.4769681394100189},{"id":"https://openalex.org/keywords/rotation-formalisms-in-three-dimensions","display_name":"Rotation formalisms in three dimensions","score":0.47488921880722046},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.44370728731155396},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.42732882499694824},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4197345972061157},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3838275969028473},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3206210136413574},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.13628944754600525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.750557541847229},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6267997026443481},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5473549365997314},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4853876829147339},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.48293372988700867},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.4769681394100189},{"id":"https://openalex.org/C171018156","wikidata":"https://www.wikidata.org/wiki/Q7370306","display_name":"Rotation formalisms in three dimensions","level":2,"score":0.47488921880722046},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.44370728731155396},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.42732882499694824},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4197345972061157},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3838275969028473},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3206210136413574},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13628944754600525},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tim.2023.3324362","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tim.2023.3324362","pdf_url":null,"source":{"id":"https://openalex.org/S10892749","display_name":"IEEE Transactions on Instrumentation and Measurement","issn_l":"0018-9456","issn":["0018-9456","1557-9662"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Instrumentation and Measurement","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.800000011920929,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1588286438","display_name":null,"funder_award_id":"2008085QF325","funder_id":"https://openalex.org/F4320334897","funder_display_name":"Natural Science Foundation of Anhui Province"},{"id":"https://openalex.org/G4031136159","display_name":null,"funder_award_id":"2008085QF314","funder_id":"https://openalex.org/F4320334897","funder_display_name":"Natural Science Foundation of Anhui Province"},{"id":"https://openalex.org/G6650910732","display_name":null,"funder_award_id":"zk19-15","funder_id":"https://openalex.org/F4320334897","funder_display_name":"Natural Science Foundation of Anhui Province"}],"funders":[{"id":"https://openalex.org/F4320334897","display_name":"Natural Science Foundation of Anhui Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":83,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2251512949","https://openalex.org/W2489434015","https://openalex.org/W2558535589","https://openalex.org/W2568262903","https://openalex.org/W2606473278","https://openalex.org/W2616247523","https://openalex.org/W2770129969","https://openalex.org/W2779827764","https://openalex.org/W2788810331","https://openalex.org/W2884561390","https://openalex.org/W2896457183","https://openalex.org/W2904910963","https://openalex.org/W2908510526","https://openalex.org/W2946086442","https://openalex.org/W2952524542","https://openalex.org/W2956932426","https://openalex.org/W2962529143","https://openalex.org/W2962764817","https://openalex.org/W2963109634","https://openalex.org/W2963783181","https://openalex.org/W2963876163","https://openalex.org/W2964022527","https://openalex.org/W2964284374","https://openalex.org/W2964345792","https://openalex.org/W2984121207","https://openalex.org/W2986755220","https://openalex.org/W2986803748","https://openalex.org/W2987734933","https://openalex.org/W2995277480","https://openalex.org/W2997591391","https://openalex.org/W3034772468","https://openalex.org/W3083600713","https://openalex.org/W3089758964","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3096609285","https://openalex.org/W3102564565","https://openalex.org/W3107094551","https://openalex.org/W3110435696","https://openalex.org/W3126337491","https://openalex.org/W3138516171","https://openalex.org/W3163747765","https://openalex.org/W3166396011","https://openalex.org/W3181159501","https://openalex.org/W3206072662","https://openalex.org/W3207127495","https://openalex.org/W3211937532","https://openalex.org/W3213454282","https://openalex.org/W3214586131","https://openalex.org/W4214490042","https://openalex.org/W4229042118","https://openalex.org/W4285025661","https://openalex.org/W4285145167","https://openalex.org/W4289126595","https://openalex.org/W4306353129","https://openalex.org/W4309181071","https://openalex.org/W4312956471","https://openalex.org/W4313124918","https://openalex.org/W4313145013","https://openalex.org/W4323663038","https://openalex.org/W4366386371","https://openalex.org/W4366668491","https://openalex.org/W4384820618","https://openalex.org/W4385245566","https://openalex.org/W6639102338","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6757135208","https://openalex.org/W6757459251","https://openalex.org/W6757817989","https://openalex.org/W6760424586","https://openalex.org/W6768263079","https://openalex.org/W6778485988","https://openalex.org/W6780226713","https://openalex.org/W6781547866","https://openalex.org/W6787022187","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6802517928","https://openalex.org/W6804416512","https://openalex.org/W6811072154","https://openalex.org/W6838787608"],"related_works":["https://openalex.org/W57206970","https://openalex.org/W3196191855","https://openalex.org/W1950785758","https://openalex.org/W2498253392","https://openalex.org/W4235091896","https://openalex.org/W2023896637","https://openalex.org/W2185981755","https://openalex.org/W4235248315","https://openalex.org/W2761617891","https://openalex.org/W2741631785"],"abstract_inverted_index":{"This":[0],"paper":[1],"tackles":[2],"the":[3,15,30,40,54,70,76,122,139,147,154,159,163,181,187],"challenging":[4],"yet":[5],"significant":[6],"task":[7],"of":[8],"grounding":[9,26,85],"a":[10,80,103,109],"natural":[11],"language":[12,41],"query":[13],"to":[14,28,98,115,128,137],"corresponding":[16],"region":[17],"onto":[18],"an":[19,50],"image.":[20],"The":[21],"main":[22],"challenge":[23],"in":[24,53,102],"visual":[25,33,61,84],"is":[27,49,126],"model":[29,136,151],"correspondence":[31],"between":[32],"context":[34],"and":[35,62,82,92,131,152,176],"semantic":[36,72],"concept":[37],"referred":[38],"by":[39,108],"expression,":[42],"that":[43,180],"is,":[44],"multi-modal":[45,90,104],"fusion.":[46],"Nevertheless,":[47],"there":[48],"inherent":[51],"deficiency":[52],"current":[55],"fusion":[56],"module":[57],"designs,":[58],"which":[59],"makes":[60],"linguistic":[63],"feature":[64],"embeddings":[65],"cannot":[66],"be":[67],"unified":[68,118],"into":[69],"same":[71],"space.":[73],"To":[74],"address":[75],"issue,":[77],"we":[78,96,157],"present":[79],"novel":[81],"effective":[83],"framework":[86],"based":[87],"on":[88,170],"joint":[89,119],"representation":[91],"interaction":[93],"(JMRI).":[94],"Specifically,":[95],"propose":[97],"perform":[99],"image-text":[100],"alignment":[101],"embedding":[105],"space":[106],"learned":[107],"large-scale":[110],"foundation":[111,150],"model,":[112],"so":[113],"as":[114],"obtain":[116],"semantically":[117],"representations.":[120],"Furthermore,":[121],"transformer-based":[123],"deep":[124],"interactor":[125],"designed":[127],"capture":[129],"intra-modal":[130],"inter-modal":[132],"correlations,":[133],"rendering":[134],"our":[135],"highlight":[138],"localization-relevant":[140],"cues":[141],"for":[142],"accurate":[143],"reasoning.":[144],"By":[145],"freezing":[146],"pre-trained":[148],"vision-language":[149],"updating":[153],"other":[155],"modules,":[156],"achieve":[158],"best":[160],"performance":[161],"with":[162,174],"lowest":[164],"training":[165],"cost.":[166],"Extensive":[167],"experimental":[168],"results":[169],"five":[171],"benchmark":[172],"datasets":[173],"quantitative":[175],"qualitative":[177],"analysis":[178],"show":[179],"proposed":[182],"method":[183],"performs":[184],"favorably":[185],"against":[186],"state-of-the-arts.":[188]},"counts_by_year":[{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
