{"id":"https://openalex.org/W4416250393","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227351","title":"AMNET: Attention-Base Multiscale Network for Image-Text Matching","display_name":"AMNET: Attention-Base Multiscale Network for Image-Text Matching","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416250393","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227351"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11227351","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227351","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072113100","display_name":"Josephine Hai","orcid":"https://orcid.org/0000-0002-5842-7686"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiabao Hai","raw_affiliation_strings":["Xi&#x2019;an Jiaotong University,School of Software,Xi&#x2019;an,China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Jiaotong University,School of Software,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071338243","display_name":"Qing Yu","orcid":"https://orcid.org/0000-0003-2513-2969"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qing Yu","raw_affiliation_strings":["XinJiang University,School of Information Science and Engineering,Urumqi,China"],"affiliations":[{"raw_affiliation_string":"XinJiang University,School of Information Science and Engineering,Urumqi,China","institution_ids":["https://openalex.org/I96908189"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011440708","display_name":"Ye-Hua Gan","orcid":"https://orcid.org/0000-0001-7893-5787"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yutong Gan","raw_affiliation_strings":["XinJiang University,School of Software,Urumqi,China"],"affiliations":[{"raw_affiliation_string":"XinJiang University,School of Software,Urumqi,China","institution_ids":["https://openalex.org/I96908189"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100375209","display_name":"Mei Wang","orcid":"https://orcid.org/0000-0002-8136-7562"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingshuo Wang","raw_affiliation_strings":["XinJiang University,School of Software,Urumqi,China"],"affiliations":[{"raw_affiliation_string":"XinJiang University,School of Software,Urumqi,China","institution_ids":["https://openalex.org/I96908189"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046875545","display_name":"Yuhui Zhou","orcid":"https://orcid.org/0009-0002-5343-8084"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhui Zhou","raw_affiliation_strings":["XinJiang University,School of Software,Urumqi,China"],"affiliations":[{"raw_affiliation_string":"XinJiang University,School of Software,Urumqi,China","institution_ids":["https://openalex.org/I96908189"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5072113100"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37309942,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8687000274658203,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8687000274658203,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.03929999843239784,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.03819999843835831,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.613099992275238},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5698999762535095},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5619999766349792},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5479000210762024},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5343000292778015},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5144000053405762},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4627000093460083},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.44130000472068787}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8342000246047974},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6909999847412109},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.613099992275238},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5698999762535095},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5619999766349792},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5479000210762024},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5343000292778015},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5144000053405762},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4627000093460083},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.44130000472068787},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3961000144481659},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.38989999890327454},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36640000343322754},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35370001196861267},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.3357999920845032},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.3109000027179718},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.28200000524520874},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.27230000495910645},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2709999978542328},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.26989999413490295},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.25189998745918274},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11227351","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227351","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2745461083","https://openalex.org/W2757526371","https://openalex.org/W2962964995","https://openalex.org/W2963794516","https://openalex.org/W2965848243","https://openalex.org/W2997945091","https://openalex.org/W3008356057","https://openalex.org/W3035605030","https://openalex.org/W3092820619","https://openalex.org/W3175888430","https://openalex.org/W3206675006","https://openalex.org/W3206723373","https://openalex.org/W3214042654","https://openalex.org/W4210894218","https://openalex.org/W4289654392","https://openalex.org/W4297630623","https://openalex.org/W4312761738","https://openalex.org/W4315643841","https://openalex.org/W4321607993","https://openalex.org/W4385934203","https://openalex.org/W4386071498","https://openalex.org/W4386071757","https://openalex.org/W4387130422","https://openalex.org/W4389474309","https://openalex.org/W4391305635","https://openalex.org/W4403420955"],"related_works":[],"abstract_inverted_index":{"The":[0,183],"domain":[1],"of":[2,12,22,43,49,69,73,89,107,133,147,160,172],"image-text":[3,90],"matching":[4],"has":[5],"garnered":[6],"considerable":[7],"interest":[8],"within":[9],"the":[10,26,41,47,52,67,87,97,105,127,131,134,156,161,165],"spheres":[11],"computer":[13],"vision":[14],"and":[15,30,46,54,101,113,177],"multimodal":[16],"interaction,":[17],"marking":[18],"a":[19,117,138,144,168],"pivotal":[20],"area":[21],"study":[23,77],"that":[24],"bridges":[25],"gap":[27],"between":[28,51,99],"visual":[29,53,100],"textual":[31,55,102],"modalities.":[32],"This":[33,92],"cross-modal":[34],"endeavor":[35],"primarily":[36],"encompasses":[37],"two":[38],"fundamental":[39],"challenges:":[40],"extraction":[42,68,141,158],"representative":[44],"features":[45],"quantification":[48],"similarity":[50],"entities.":[56],"Predominantly,":[57],"existing":[58],"methodologies":[59],"adhere":[60],"to":[61],"monolithic,":[62],"single-scale":[63],"neural":[64],"architectures":[65],"for":[66,86,180,185],"features.":[70],"In":[71],"light":[72],"these":[74],"limitations,":[75],"this":[76,186],"introduces":[78],"an":[79],"attention-base":[80],"multiscale":[81,118],"network":[82],"(AMNET)":[83],"specifically":[84],"designed":[85],"task":[88],"matching.":[91],"novel":[93],"framework":[94],"meticulously":[95],"leverages":[96],"synergies":[98],"elements":[103],"through":[104],"amalgamation":[106],"intermediate":[108],"outputs":[109],"derived":[110],"from":[111],"BERT":[112],"Vision":[114],"Transformer,":[115],"facilitating":[116],"fusion":[119],"approach.":[120],"Rigor":[121],"ours":[122],"empirical":[123],"evaluations":[124],"conducted":[125],"on":[126],"Flickr30K":[128],"dataset":[129],"substantiate":[130],"efficacy":[132],"proposed":[135],"model":[136,166,187],"as":[137],"versatile":[139],"feature":[140,157],"module,":[142],"registering":[143],"noteworthy":[145],"enhancement":[146],"18%":[148],"in":[149],"rSum":[150],"evaluation":[151],"metrics,":[152],"solely":[153],"by":[154],"refining":[155],"process":[159],"pre-existing":[162],"models.":[163],"Notably,":[164],"demonstrates":[167],"significant":[169],"average":[170],"improvement":[171],"5.77%":[173],"across":[174],"R@1,":[175],"R@5,":[176],"R@10":[178],"metrics":[179],"Text-to-Image":[181],"tasks.":[182],"code":[184],"is":[188],"publicly":[189],"accessible":[190],"at":[191],"https://github.com/WMSlalalala/AMS.":[192]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
