{"id":"https://openalex.org/W4413947081","doi":"https://doi.org/10.1109/icra55743.2025.11128851","title":"SCA3D: Enhancing Cross-Modal 3D Retrieval via 3D Shape and Caption Paired Data Augmentation","display_name":"SCA3D: Enhancing Cross-Modal 3D Retrieval via 3D Shape and Caption Paired Data Augmentation","publication_year":2025,"publication_date":"2025-05-19","ids":{"openalex":"https://openalex.org/W4413947081","doi":"https://doi.org/10.1109/icra55743.2025.11128851"},"language":"en","primary_location":{"id":"doi:10.1109/icra55743.2025.11128851","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11128851","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Junlong Ren","orcid":null},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN","HK"],"is_corresponding":true,"raw_author_name":"Junlong Ren","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou),AI Thrust,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou),AI Thrust,Guangzhou,China","institution_ids":["https://openalex.org/I90610280","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102921136","display_name":"Hao Wu","orcid":"https://orcid.org/0000-0002-8738-3942"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Hao Wu","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou),AI Thrust,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou),AI Thrust,Guangzhou,China","institution_ids":["https://openalex.org/I90610280","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101862105","display_name":"Hui Xiong","orcid":"https://orcid.org/0000-0001-6805-3692"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Hui Xiong","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou),AI Thrust,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou),AI Thrust,Guangzhou,China","institution_ids":["https://openalex.org/I90610280","https://openalex.org/I889458895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101635618","display_name":"Hao Wang","orcid":"https://orcid.org/0000-0001-8608-8151"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Hao Wang","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou),AI Thrust,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou),AI Thrust,Guangzhou,China","institution_ids":["https://openalex.org/I90610280","https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I889458895","https://openalex.org/I90610280"],"apc_list":null,"apc_paid":null,"fwci":1.163,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82399061,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"9550","last_page":"9557"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11211","display_name":"3D Surveying and Cultural Heritage","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1907","display_name":"Geology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.7198317646980286},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6983602046966553},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4621022939682007},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3865911066532135},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3501704931259155},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.34038496017456055},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.08636751770973206}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.7198317646980286},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6983602046966553},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4621022939682007},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3865911066532135},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3501704931259155},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.34038496017456055},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.08636751770973206},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icra55743.2025.11128851","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11128851","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-167351","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-167351","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8721675579","display_name":null,"funder_award_id":"62406267","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1924770834","https://openalex.org/W2069870183","https://openalex.org/W2108598243","https://openalex.org/W2961368225","https://openalex.org/W2963048248","https://openalex.org/W2964078384","https://openalex.org/W2984008963","https://openalex.org/W2990138404","https://openalex.org/W3215926197","https://openalex.org/W4236965008","https://openalex.org/W4247726808","https://openalex.org/W4293363567","https://openalex.org/W4311642023","https://openalex.org/W4312818263","https://openalex.org/W4366330503","https://openalex.org/W4383108296","https://openalex.org/W4386071466","https://openalex.org/W4386072101","https://openalex.org/W4390872570","https://openalex.org/W4394625706","https://openalex.org/W4394671432","https://openalex.org/W4402716131","https://openalex.org/W4402727730","https://openalex.org/W4402733575","https://openalex.org/W4402979956","https://openalex.org/W4405786528","https://openalex.org/W4413156118","https://openalex.org/W4413917033"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"The":[0],"cross-modal":[1,56,100,205,214],"3D":[2,14,29,49,57,72,91,101,120,153,192,225],"retrieval":[3,58,221],"task":[4],"aims":[5],"to":[6,20,109,146,171,188,246,255],"achieve":[7],"mutual":[8],"matching":[9,215],"between":[10,24,222],"text":[11,167,181],"descriptions":[12],"and":[13,27,37,46,93,143,162,178,194,212,224,248],"shapes.":[15,226],"This":[16],"has":[17],"the":[18,22,28,33,44,52,68,106,123,128,157,173,198,236,240,249],"potential":[19],"enhance":[21,213],"interaction":[23],"natural":[25],"language":[26],"environment,":[30],"especially":[31],"within":[32,122],"realms":[34],"of":[35,48,54,71,118,130,175],"robotics":[36],"embodied":[38],"artificial":[39],"intelligence":[40],"(AI)":[41],"applications.":[42],"However,":[43],"scarcity":[45],"expensiveness":[47],"data":[50,96],"constrain":[51],"performance":[53],"existing":[55],"methods.":[59],"These":[60],"methods":[61],"heavily":[62],"rely":[63],"on":[64,197,235],"features":[65],"derived":[66],"from":[67,244,253],"limited":[69],"number":[70],"shapes,":[73],"resulting":[74],"in":[75,261],"poor":[76],"generalization":[77],"ability":[78],"across":[79],"diverse":[80],"scenarios.":[81],"To":[82],"address":[83],"this":[84],"challenge,":[85],"we":[86,184],"introduce":[87],"SCA3D,":[88],"a":[89,111,151],"novel":[90],"shape":[92,121],"caption":[94],"online":[95],"augmentation":[97],"method":[98],"for":[99,191],"retrieval.":[102],"Our":[103],"approach":[104],"uses":[105],"LLaVA":[107],"model":[108],"create":[110],"component":[112,177],"library,":[113],"captioning":[114],"each":[115,176],"segmented":[116],"part":[117],"every":[119],"dataset.":[124,200],"Notably,":[125],"it":[126],"facilitates":[127],"generation":[129],"extensive":[131],"new":[132,136,152,180],"3D-text":[133],"pairs":[134],"containing":[135],"semantic":[137],"features.":[138],"We":[139,201],"employ":[140],"both":[141],"inter":[142],"intra":[144],"distances":[145],"align":[147],"various":[148],"components":[149,158],"into":[150],"shape,":[154],"ensuring":[155],"that":[156],"do":[159],"not":[160],"overlap":[161],"are":[163,169],"closely":[164],"fitted.":[165],"Further,":[166],"templates":[168],"utilized":[170],"process":[172],"captions":[174],"generate":[179],"descriptions.":[182],"Besides,":[183],"use":[185],"unimodal":[186],"encoders":[187],"extract":[189],"embeddings":[190],"shapes":[193],"texts":[195,223],"based":[196],"enriched":[199],"then":[202],"calculate":[203],"fine-grained":[204],"similarity":[206],"using":[207],"Earth":[208],"Mover's":[209],"Distance":[210],"(EMD)":[211],"with":[216],"contrastive":[217],"learning,":[218],"enabling":[219],"bidirectional":[220],"Extensive":[227],"experiments":[228],"show":[229],"our":[230],"SCA3D":[231],"outperforms":[232],"previous":[233],"works":[234],"Text2Shape":[237],"dataset,":[238],"raising":[239],"Shape-to-Text":[241],"RR@1":[242,251],"score":[243,252],"20.03":[245],"27.22":[247],"Text-to-Shape":[250],"13.12":[254],"16.67.":[256],"Codes":[257],"can":[258],"be":[259],"found":[260],"https://github.com/3DAgentWorld/SCA3D.":[262]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
