{"id":"https://openalex.org/W4415708140","doi":"https://doi.org/10.1109/icme59968.2025.11210222","title":"3D-Contrastive Anchors and Structure Enhancement for Multi-modal Representations","display_name":"3D-Contrastive Anchors and Structure Enhancement for Multi-modal Representations","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415708140","doi":"https://doi.org/10.1109/icme59968.2025.11210222"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11210222","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11210222","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102548769","display_name":"Mingkai Sheng","orcid":"https://orcid.org/0009-0003-5881-5691"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingkai Sheng","raw_affiliation_strings":["Zhejiang Lab,Hangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Lab,Hangzhou,China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jichao Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jichao Wang","raw_affiliation_strings":["Zhejiang Lab,Hangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Lab,Hangzhou,China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010616457","display_name":"Yi Liu","orcid":"https://orcid.org/0000-0002-3244-3258"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Liu","raw_affiliation_strings":["Zhejiang Lab,Hangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Lab,Hangzhou,China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100344183","display_name":"Wen Cheng","orcid":"https://orcid.org/0000-0002-9063-1177"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Cheng","raw_affiliation_strings":["Zhejiang Lab,Hangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Lab,Hangzhou,China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036840192","display_name":"Lingfang Zeng","orcid":"https://orcid.org/0000-0003-3130-3015"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lingfang Zeng","raw_affiliation_strings":["Zhejiang Lab,Hangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Lab,Hangzhou,China","institution_ids":["https://openalex.org/I4210123185"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210123185"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26443886,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9386000037193298,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9386000037193298,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.01810000091791153,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.008999999612569809,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7121999859809875},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6646999716758728},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.5740000009536743},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4918000102043152},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4553000032901764},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4546000063419342},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4009999930858612},{"id":"https://openalex.org/keywords/closing","display_name":"Closing (real estate)","score":0.376800000667572}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7777000069618225},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7121999859809875},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6771000027656555},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6646999716758728},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.5740000009536743},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4918000102043152},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4912000000476837},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4553000032901764},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4546000063419342},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4009999930858612},{"id":"https://openalex.org/C2778775528","wikidata":"https://www.wikidata.org/wiki/Q5135432","display_name":"Closing (real estate)","level":2,"score":0.376800000667572},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.3531999886035919},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.33869999647140503},{"id":"https://openalex.org/C148043351","wikidata":"https://www.wikidata.org/wiki/Q4456944","display_name":"Current (fluid)","level":2,"score":0.31709998846054077},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.3151000142097473},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.29589998722076416},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.2906000018119812},{"id":"https://openalex.org/C2982736386","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Statistical learning","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.25699999928474426},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11210222","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11210222","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null},{"id":"https://openalex.org/F4320335581","display_name":"Young Scientists Fund","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1773149199","https://openalex.org/W1840435438","https://openalex.org/W2886641317","https://openalex.org/W2963530300","https://openalex.org/W2970231061","https://openalex.org/W3035524453","https://openalex.org/W3035682985","https://openalex.org/W3091588028","https://openalex.org/W3129831491","https://openalex.org/W3173909648","https://openalex.org/W4385245566","https://openalex.org/W4386065512","https://openalex.org/W4386071687","https://openalex.org/W4393154896","https://openalex.org/W4402979790","https://openalex.org/W4402980261"],"related_works":[],"abstract_inverted_index":{"Large-scale":[0],"visual-language":[1],"representation":[2,24],"learning":[3,73],"has":[4],"demonstrated":[5],"its":[6],"potential":[7],"for":[8,22,75,88],"constructing":[9],"multimodal":[10,77],"models.":[11],"Current":[12],"methods":[13,29],"employ":[14],"encoders":[15],"to":[16,31,86],"learn":[17],"special":[18],"classification":[19],"tokens":[20],"([CLS])":[21],"cross-modal":[23],"and":[25,38,61,68,93,122],"alignment.":[26,78],"However,":[27],"current":[28],"fail":[30],"effectively":[32],"capture":[33],"fine-grained":[34],"attributes":[35],"in":[36,41,112],"images":[37],"structural":[39,94],"information":[40],"text,":[42],"limiting":[43],"their":[44],"performance":[45,128],"on":[46,105,114,132],"tasks":[47],"requiring":[48],"detailed":[49],"semantics.":[50],"To":[51],"address":[52],"these":[53],"issues,":[54],"we":[55,80],"propose":[56],"3DASEM":[57,100],"(3D":[58],"Constrasive":[59],"Anchors":[60],"Structured-Enhanced":[62],"Module),":[63],"which":[64],"leverages":[65],"attribute-structure":[66],"injection":[67],"a":[69,109],"novel":[70],"three-dimensional":[71],"contrastive":[72],"strategy":[74],"precise":[76],"Additionally,":[79],"constructed":[81],"an":[82],"attribute":[83,92],"dataset":[84],"designed":[85],"compensate":[87],"the":[89,117,127,137],"lack":[90],"of":[91,129,139],"information.":[95],"Experimental":[96],"results":[97],"demonstrate":[98],"that":[99],"consistently":[101],"outperforms":[102],"state-of-the-art":[103],"models":[104,130],"benchmark":[106],"datasets,":[107],"achieving":[108],"4.1%":[110],"improvement":[111],"recall@1":[113],"MSCOCO":[115],"with":[116],"same":[118],"size":[119],"training":[120],"data":[121],"closing":[123],"or":[124],"even":[125],"surpassing":[126],"trained":[131],"more":[133],"than":[134],"three":[135],"times":[136],"amount":[138],"data.":[140]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-30T00:00:00"}
