{"id":"https://openalex.org/W4399423034","doi":"https://doi.org/10.1145/3652583.3658068","title":"Dynamic Soft Labeling for Visual Semantic Embedding","display_name":"Dynamic Soft Labeling for Visual Semantic Embedding","publication_year":2024,"publication_date":"2024-05-30","ids":{"openalex":"https://openalex.org/W4399423034","doi":"https://doi.org/10.1145/3652583.3658068"},"language":"en","primary_location":{"id":"doi:10.1145/3652583.3658068","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658068","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658068","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658068","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050610245","display_name":"Jiaao Yu","orcid":"https://orcid.org/0009-0008-2243-5933"},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiaao Yu","raw_affiliation_strings":["College of Computer Science and Technology, Ocean University of China, Qingdao, China"],"raw_orcid":"https://orcid.org/0009-0008-2243-5933","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044692080","display_name":"Yunlai Ding","orcid":"https://orcid.org/0009-0000-7253-7614"},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunlai Ding","raw_affiliation_strings":["College of Computer Science and Technology, Ocean University of China, Qingdao, China"],"raw_orcid":"https://orcid.org/0009-0000-7253-7614","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029633264","display_name":"Junyu Dong","orcid":"https://orcid.org/0000-0001-7012-2087"},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junyu Dong","raw_affiliation_strings":["College of Computer Science and Technology, Ocean University of China &amp; Sanya Oceanographic Institution, Ocean University of China, Qingdao, China"],"raw_orcid":"https://orcid.org/0000-0001-7012-2087","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Ocean University of China &amp; Sanya Oceanographic Institution, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052012464","display_name":"Yuezun Li","orcid":"https://orcid.org/0000-0001-9299-1945"},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuezun Li","raw_affiliation_strings":["College of Computer Science and Technology, Ocean University of China, Qingdao, China"],"raw_orcid":"https://orcid.org/0000-0001-9299-1945","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5050610245"],"corresponding_institution_ids":["https://openalex.org/I59028903"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06999512,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"220","last_page":"228"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7612890005111694},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6921646595001221},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.60403972864151},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5005247592926025},{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.48313257098197937},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.46512529253959656},{"id":"https://openalex.org/keywords/subspace-topology","display_name":"Subspace topology","score":0.4466078579425812},{"id":"https://openalex.org/keywords/decoupling","display_name":"Decoupling (probability)","score":0.44177061319351196},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.4367039203643799},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.43273115158081055},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36164408922195435},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.11948680877685547}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7612890005111694},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6921646595001221},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.60403972864151},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5005247592926025},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.48313257098197937},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.46512529253959656},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.4466078579425812},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.44177061319351196},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.4367039203643799},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.43273115158081055},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36164408922195435},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.11948680877685547},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3652583.3658068","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658068","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658068","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3652583.3658068","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658068","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658068","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4399423034.pdf"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W2185175083","https://openalex.org/W2277195237","https://openalex.org/W2745461083","https://openalex.org/W2962964995","https://openalex.org/W2963467339","https://openalex.org/W2964187781","https://openalex.org/W2964303913","https://openalex.org/W2981586349","https://openalex.org/W2988823324","https://openalex.org/W2991118492","https://openalex.org/W2997403743","https://openalex.org/W3034655362","https://openalex.org/W3035454331","https://openalex.org/W3035605030","https://openalex.org/W3092820619","https://openalex.org/W3108373531","https://openalex.org/W3145807616","https://openalex.org/W3175888430","https://openalex.org/W4210894218","https://openalex.org/W4213087664","https://openalex.org/W4292258164","https://openalex.org/W4313030099","https://openalex.org/W4386071498"],"related_works":["https://openalex.org/W2165912799","https://openalex.org/W2735662278","https://openalex.org/W2382615723","https://openalex.org/W4311804456","https://openalex.org/W1987484445","https://openalex.org/W2623658258","https://openalex.org/W2143413548","https://openalex.org/W1969219540","https://openalex.org/W2370459448","https://openalex.org/W2105067402"],"abstract_inverted_index":{"Visual":[0],"Semantic":[1],"Embedding":[2],"(VSE)":[3],"is":[4],"a":[5,14,37,89,178],"prominent":[6],"approach":[7],"in":[8,177,184],"image-text":[9],"retrieval,":[10],"aiming":[11],"to":[12,57,78,150,173],"learn":[13],"deep":[15],"embedding":[16],"space":[17],"that":[18,93],"aligns":[19],"visual":[20],"data":[21],"with":[22,41,71],"semantic":[23,48,73,171],"text":[24],"labels.":[25],"However,":[26],"current":[27],"VSE":[28],"methods":[29],"oversimplify":[30],"the":[31,47,59,79,103,117,128,135,144,152,174,185,195,205,215],"retrieval":[32],"task,":[33],"treating":[34],"it":[35],"as":[36],"binary":[38],"classification":[39],"problem":[40],"triplet":[42],"loss":[43],"constraints.":[44],"This":[45,101],"ignores":[46],"correlation":[49,104],"between":[50,62,105,154,211],"pairs":[51,110,213],"of":[52,137,161,181,197,207,217],"mismatched":[53],"samples":[54,70,163],"and":[55,107,111,156,191,214],"fails":[56],"capture":[58],"similarity":[60],"gradient":[61],"samples.":[63,158],"In":[64,140],"addition,":[65,141],"hard":[66],"constraints":[67],"on":[68,169,189,223],"negative":[69,108,132,157,162,219],"high":[72],"relevance":[74,172],"can":[75,164],"be":[76,165],"detrimental":[77],"model's":[80],"representational":[81],"capabilities.":[82],"To":[83],"address":[84],"these":[85],"limitations,":[86],"we":[87,142],"propose":[88,143],"novel":[90],"training":[91],"strategy":[92],"introduces":[94],"dynamic":[95,199],"soft":[96,200],"labels":[97],"without":[98],"additional":[99],"annotations.":[100,225],"captures":[102],"positive":[106,155],"sample":[109,182,212,220],"guides":[112],"feature":[113],"representation":[114,136],"learning":[115],"using":[116],"Soft":[118],"Negative":[119,146],"Alignment":[120],"Loss":[121,148],"(SNAL).":[122],"SNAL":[123],"fully":[124],"takes":[125],"into":[126],"account":[127],"influence":[129],"by":[130],"similar":[131],"samples,":[133],"enhancing":[134],"cross-modal":[138],"data.":[139],"Stepwise":[145,159],"Decoupling":[147],"(SNDL)":[149],"increase":[151],"distance":[153],"decoupling":[160],"adaptively":[166],"distanced":[167],"based":[168,222],"their":[170],"anchor,":[175],"resulting":[176],"wider":[179],"distribution":[180],"features":[183],"common":[186],"space.":[187],"Experiments":[188],"Flickr30K":[190],"MS-COCO":[192],"datasets":[193],"validate":[194],"effectiveness":[196],"our":[198],"labeling":[201],"(DSL)":[202],"methods,":[203],"demonstrating":[204],"importance":[206],"considering":[208],"complex":[209],"relationships":[210],"limitations":[216],"rigid":[218],"categorization":[221],"subjective":[224]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
