{"id":"https://openalex.org/W4416249620","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227213","title":"Dual-Domain Model for Enhanced Visual Grounding with Cross-Modal Interaction","display_name":"Dual-Domain Model for Enhanced Visual Grounding with Cross-Modal Interaction","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416249620","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227213"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11227213","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227213","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057592591","display_name":"Hai Nan","orcid":"https://orcid.org/0000-0001-5114-9348"},"institutions":[{"id":"https://openalex.org/I50632499","display_name":"Chongqing University of Technology","ror":"https://ror.org/04vgbd477","country_code":"CN","type":"education","lineage":["https://openalex.org/I50632499"]},{"id":"https://openalex.org/I168337820","display_name":"Chongqing University of Science and Technology","ror":"https://ror.org/03n3v6d52","country_code":"CN","type":"education","lineage":["https://openalex.org/I168337820"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hai Nan","raw_affiliation_strings":["Chongqing University of Technology,School of Computer Science and Engineering,China"],"affiliations":[{"raw_affiliation_string":"Chongqing University of Technology,School of Computer Science and Engineering,China","institution_ids":["https://openalex.org/I168337820","https://openalex.org/I50632499"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070278460","display_name":"Chaoyue Li","orcid":"https://orcid.org/0000-0001-6752-0332"},"institutions":[{"id":"https://openalex.org/I50632499","display_name":"Chongqing University of Technology","ror":"https://ror.org/04vgbd477","country_code":"CN","type":"education","lineage":["https://openalex.org/I50632499"]},{"id":"https://openalex.org/I168337820","display_name":"Chongqing University of Science and Technology","ror":"https://ror.org/03n3v6d52","country_code":"CN","type":"education","lineage":["https://openalex.org/I168337820"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chaoyue Li","raw_affiliation_strings":["Chongqing University of Technology,School of Computer Science and Engineering,China"],"affiliations":[{"raw_affiliation_string":"Chongqing University of Technology,School of Computer Science and Engineering,China","institution_ids":["https://openalex.org/I168337820","https://openalex.org/I50632499"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5057592591"],"corresponding_institution_ids":["https://openalex.org/I168337820","https://openalex.org/I50632499"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.3425214,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9786999821662903,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9786999821662903,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.004900000058114529,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.0044999998062849045,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6334999799728394},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5023000240325928},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4309999942779541},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.3930000066757202},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.37770000100135803},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.3594000041484833},{"id":"https://openalex.org/keywords/feed-forward","display_name":"Feed forward","score":0.3564999997615814},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.3361999988555908}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7494000196456909},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6452999711036682},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6334999799728394},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5023000240325928},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4309999942779541},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.3930000066757202},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.37770000100135803},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3634999990463257},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.3594000041484833},{"id":"https://openalex.org/C38858127","wikidata":"https://www.wikidata.org/wiki/Q5441228","display_name":"Feed forward","level":2,"score":0.3564999997615814},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.3361999988555908},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.3271999955177307},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3231000006198883},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.32010000944137573},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.31369999051094055},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3107999861240387},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3003000020980835},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.29760000109672546},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.27090001106262207},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.2581000030040741}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11227213","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227213","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322955","display_name":"Chongqing University of Technology","ror":"https://ror.org/04vgbd477"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1773149199","https://openalex.org/W2251512949","https://openalex.org/W2489434015","https://openalex.org/W2606473278","https://openalex.org/W2770129969","https://openalex.org/W2904910963","https://openalex.org/W2962766617","https://openalex.org/W2962858109","https://openalex.org/W2963109634","https://openalex.org/W2963150697","https://openalex.org/W2963876163","https://openalex.org/W2964022527","https://openalex.org/W2964284374","https://openalex.org/W2971915009","https://openalex.org/W2984121207","https://openalex.org/W2987734933","https://openalex.org/W3006154882","https://openalex.org/W3034772468","https://openalex.org/W3037533539","https://openalex.org/W3096609285","https://openalex.org/W3110435696","https://openalex.org/W3163747765","https://openalex.org/W3173364567","https://openalex.org/W3174004334","https://openalex.org/W3207127495","https://openalex.org/W4210820868","https://openalex.org/W4214490042","https://openalex.org/W4214666412","https://openalex.org/W4312351586","https://openalex.org/W4323663038","https://openalex.org/W4382457391","https://openalex.org/W4382458695","https://openalex.org/W4383108296","https://openalex.org/W4384820618","https://openalex.org/W4386047745","https://openalex.org/W4387272106","https://openalex.org/W4387623721","https://openalex.org/W4393148430","https://openalex.org/W4403390176","https://openalex.org/W4403998616"],"related_works":[],"abstract_inverted_index":{"Visual":[0],"grounding":[1,135],"relies":[2],"on":[3,104,131],"reasoning":[4],"between":[5],"visual":[6,74,109,134],"and":[7,20,43,46,68,75,110,126],"language":[8,76,111],"modalities.":[9,112],"Existing":[10],"multimodal":[11],"interaction":[12],"methods":[13],"struggle":[14],"to":[15,37,98],"handle":[16],"complex":[17],"cross-modal":[18],"relationships":[19],"perform":[21],"poorly":[22],"in":[23,88],"dynamic":[24],"scenes.":[25],"Most":[26],"paradigms":[27],"are":[28],"constrained":[29],"by":[30],"single":[31],"spatial-domain":[32,86],"attention,":[33],"making":[34],"it":[35],"difficult":[36],"capture":[38],"global":[39,47],"context,":[40],"long-range":[41],"dependencies,":[42],"balance":[44],"local":[45],"features.":[48],"To":[49],"address":[50],"these":[51],"challenges,":[52],"we":[53,114],"propose":[54,115],"the":[55,79,83,105,108,116,138],"Harmonized":[56],"Spectrum-Gaussian":[57],"Adaptive":[58],"Attention":[59],"Mechanism":[60],"(HSGAM),":[61],"a":[62,121],"novel":[63],"mechanism":[64],"that":[65],"combines":[66],"frequency-domain":[67],"Gaussian":[69,95],"adaptive":[70,96],"modulation.":[71],"HSGAM":[72],"transforms":[73],"features":[77],"into":[78],"frequency":[80],"domain,":[81],"overcoming":[82],"limitations":[84],"of":[85,107,140],"self-attention":[87],"capturing":[89],"long-distance":[90],"dependencies.":[91],"It":[92],"also":[93],"introduces":[94],"modulation":[97],"dynamically":[99],"adjust":[100],"feature":[101],"interactions":[102],"based":[103],"characteristics":[106],"Additionally,":[113],"Refinative":[117],"Discriminative":[118],"Frequency":[119],"Network,":[120],"feedforward":[122],"network":[123],"incorporating":[124],"enhancement-mitigation":[125],"gating":[127],"mechanisms.":[128],"Extensive":[129],"experiments":[130],"five":[132],"benchmark":[133],"tasks":[136],"illustrate":[137],"superiority":[139],"our":[141],"network.":[142]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-14T00:00:00"}
