{"id":"https://openalex.org/W4392903351","doi":"https://doi.org/10.1109/icassp48485.2024.10448127","title":"Learning Fine-Grained Information Alignment for Calibrated Cross-Modal Retrieval","display_name":"Learning Fine-Grained Information Alignment for Calibrated Cross-Modal Retrieval","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903351","doi":"https://doi.org/10.1109/icassp48485.2024.10448127"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10448127","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10448127","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069028140","display_name":"Jianhua Dong","orcid":"https://orcid.org/0000-0002-6532-3762"},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]},{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianhua Dong","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center,Jinan,China","Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","Shandong Engineering Research Center of Big Data Applied Technology, Faculty of Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center,Jinan,China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Shandong Engineering Research Center of Big Data Applied Technology, Faculty of Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I4210142748","https://openalex.org/I152269853"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054359402","display_name":"Shengrong Zhao","orcid":"https://orcid.org/0000-0003-0965-0918"},"institutions":[{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengrong Zhao","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center,Jinan,China","Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","Shandong Engineering Research Center of Big Data Applied Technology, Faculty of Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center,Jinan,China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Shandong Engineering Research Center of Big Data Applied Technology, Faculty of Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I4210142748","https://openalex.org/I152269853"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063693569","display_name":"Liang Hu","orcid":"https://orcid.org/0000-0001-8232-2756"},"institutions":[{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hu Liang","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center,Jinan,China","Shandong Engineering Research Center of Big Data Applied Technology, Faculty of Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center,Jinan,China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Shandong Engineering Research Center of Big Data Applied Technology, Faculty of Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I4210142748","https://openalex.org/I152269853"]},{"raw_affiliation_string":"Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5069028140"],"corresponding_institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02671832,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"35","issue":null,"first_page":"8286","last_page":"8290"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8443193435668945},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.6673113703727722},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6199286580085754},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5699284076690674},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5648018717765808},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.556082010269165},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5129634737968445},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4710129499435425},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.45901504158973694},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.4497886300086975},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3378795385360718},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.32171428203582764},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.26000380516052246}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8443193435668945},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.6673113703727722},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6199286580085754},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5699284076690674},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5648018717765808},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.556082010269165},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5129634737968445},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4710129499435425},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.45901504158973694},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.4497886300086975},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3378795385360718},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32171428203582764},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.26000380516052246},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10448127","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10448127","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5899999737739563}],"awards":[],"funders":[{"id":"https://openalex.org/F4320324174","display_name":"Natural Science Foundation of Shandong Province","ror":null},{"id":"https://openalex.org/F4320328720","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608"},{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2896457183","https://openalex.org/W3035524453","https://openalex.org/W3091588028","https://openalex.org/W3176013197","https://openalex.org/W4225323055","https://openalex.org/W4229042118","https://openalex.org/W4283821388","https://openalex.org/W4300979858","https://openalex.org/W4312877428","https://openalex.org/W4313178921","https://openalex.org/W4361865337","https://openalex.org/W4386065353","https://openalex.org/W4386072101","https://openalex.org/W4390873321","https://openalex.org/W6755207826","https://openalex.org/W6784333009","https://openalex.org/W6791353385","https://openalex.org/W6798805250","https://openalex.org/W6810334672","https://openalex.org/W6811072154","https://openalex.org/W6839196789","https://openalex.org/W6844993126","https://openalex.org/W6845354634","https://openalex.org/W6851076400"],"related_works":["https://openalex.org/W4390516098","https://openalex.org/W3081694532","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W1969211203","https://openalex.org/W2142795561","https://openalex.org/W4205302943","https://openalex.org/W1517958729","https://openalex.org/W2561132942","https://openalex.org/W2092272653"],"abstract_inverted_index":{"Masked":[0],"Language":[1],"Modeling":[2],"(MLM)":[3],"and":[4,21,149],"Image-Text":[5],"Matching":[6],"(ITM)":[7],"are":[8],"always":[9],"used":[10],"in":[11,65,69,87,99,110,146,162],"fusion":[12],"encoder":[13],"to":[14,32,49,63,74,141],"learn":[15],"the":[16,26,33,39,43,57,93,102,106,115,118,133,139,157,160],"joint":[17],"representation":[18],"of":[19,29,35,46,59,108,159],"images":[20,111],"text.":[22],"In":[23,72],"existing":[24],"methods,":[25],"masking":[27,95],"strategy":[28,45,96,124],"MLM":[30],"leads":[31,62],"neglect":[34],"image":[36],"details":[37,107],"during":[38,112],"modeling":[40],"process.":[41],"Meanwhile,":[42],"sampling":[44],"ITM":[47,130],"struggles":[48],"consistently":[50],"select":[51],"high-difficulty":[52],"hard":[53,120],"negative":[54,121,127],"instances,":[55],"reducing":[56],"effectiveness":[58,158],"constraints.":[60],"This":[61,137],"challenges":[64],"aligning":[66],"fine-grained":[67,78],"information":[68,79],"cross-modal":[70,152,163],"retrieval.":[71,153],"response":[73],"this":[75,88],"challenge,":[76],"a":[77],"alignment-based":[80],"visual":[81],"language":[82],"model":[83,103,140,161],"(FAM)":[84],"is":[85,97],"proposed":[86],"paper.":[89],"On":[90,114],"one":[91],"hand,":[92,117],"attribute-based":[94],"employed":[98],"MLM,":[100],"helping":[101],"focus":[104],"on":[105],"objects":[109,145],"modeling.":[113],"other":[116],"robust":[119],"sample":[122],"generation":[123],"provides":[125],"challenging":[126],"samples":[128],"for":[129],"by":[131],"altering":[132],"relationships":[134,143],"between":[135,144],"objects.":[136],"enables":[138],"align":[142],"different":[147],"modalities":[148],"thus":[150],"calibrates":[151],"Extensive":[154],"experiments":[155],"demonstrate":[156],"retrieval":[164],"tasks.":[165]},"counts_by_year":[],"updated_date":"2025-12-25T23:11:45.687758","created_date":"2025-10-10T00:00:00"}
