{"id":"https://openalex.org/W4416750651","doi":"https://doi.org/10.1109/iros60139.2025.11245943","title":"Resource-Efficient Affordance Grounding with Complementary Depth and Semantic Prompts","display_name":"Resource-Efficient Affordance Grounding with Complementary Depth and Semantic Prompts","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416750651","doi":"https://doi.org/10.1109/iros60139.2025.11245943"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11245943","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11245943","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002505088","display_name":"Yuan Huang","orcid":"https://orcid.org/0000-0002-9538-9746"},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Yizhou Huang","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China","institution_ids":["https://openalex.org/I4210121405"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028825882","display_name":"Fan Yang","orcid":"https://orcid.org/0000-0003-3407-8183"},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Fan Yang","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China","institution_ids":["https://openalex.org/I4210121405"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067289855","display_name":"Guoliang Zhu","orcid":"https://orcid.org/0000-0001-6728-8641"},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Guoliang Zhu","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China","institution_ids":["https://openalex.org/I4210121405"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049263129","display_name":"Gen Li","orcid":"https://orcid.org/0000-0003-2358-7505"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Gen Li","raw_affiliation_strings":["University of Edinburgh,School of Informatics,UK"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh,School of Informatics,UK","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100627106","display_name":"Hao Shi","orcid":"https://orcid.org/0000-0003-0184-2245"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Shi","raw_affiliation_strings":["Zhejiang University,State Key Laboratory of Extreme Photonics and Instrumentation,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,State Key Laboratory of Extreme Photonics and Instrumentation,China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101486181","display_name":"Yue Zuo","orcid":"https://orcid.org/0000-0001-5791-3634"},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Yukun Zuo","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China","institution_ids":["https://openalex.org/I4210121405"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106498485","display_name":"Wenrui Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Wenrui Chen","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China","institution_ids":["https://openalex.org/I4210121405"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100396719","display_name":"Zhiyong Li","orcid":"https://orcid.org/0000-0001-9307-8453"},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Zhiyong Li","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China","institution_ids":["https://openalex.org/I4210121405"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027010844","display_name":"Kailun Yang","orcid":"https://orcid.org/0000-0002-1090-667X"},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Kailun Yang","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics and the National Engineering Research Center of Robot Visual Perception and Control Technology,China","institution_ids":["https://openalex.org/I4210121405"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5002505088"],"corresponding_institution_ids":["https://openalex.org/I4210121405"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37148719,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7788","last_page":"7795"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.30320000648498535,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.30320000648498535,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.2888999879360199,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.07410000264644623,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/affordance","display_name":"Affordance","score":0.8925999999046326},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.633400022983551},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6096000075340271},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5504000186920166},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4702000021934509},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.46889999508857727},{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.44999998807907104},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.42289999127388},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4154999852180481},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.3984000086784363}],"concepts":[{"id":"https://openalex.org/C194995250","wikidata":"https://www.wikidata.org/wiki/Q531136","display_name":"Affordance","level":2,"score":0.8925999999046326},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7720000147819519},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.633400022983551},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6096000075340271},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5849000215530396},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5504000186920166},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4702000021934509},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.46889999508857727},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4553999900817871},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.44999998807907104},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.42289999127388},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4154999852180481},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3984000086784363},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.37529999017715454},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.3734000027179718},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.37220001220703125},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.3643999993801117},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.35440000891685486},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.35019999742507935},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.336899995803833},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3310999870300293},{"id":"https://openalex.org/C138673069","wikidata":"https://www.wikidata.org/wiki/Q322229","display_name":"Tracing","level":2,"score":0.3052999973297119},{"id":"https://openalex.org/C2781089630","wikidata":"https://www.wikidata.org/wiki/Q21856745","display_name":"Realization (probability)","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.29750001430511475},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29100000858306885},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.27799999713897705},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.2727000117301941},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.26899999380111694},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.26109999418258667},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11245943","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11245943","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320326952","display_name":"State Key Laboratory of Industrial Control Technology","ror":null},{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1524405667","https://openalex.org/W2108598243","https://openalex.org/W2146055337","https://openalex.org/W2783693550","https://openalex.org/W2914996824","https://openalex.org/W2962784289","https://openalex.org/W2963048817","https://openalex.org/W2963635127","https://openalex.org/W2983465317","https://openalex.org/W3159481202","https://openalex.org/W3168279596","https://openalex.org/W3187864438","https://openalex.org/W4221153063","https://openalex.org/W4377000489","https://openalex.org/W4386065393","https://openalex.org/W4386065846","https://openalex.org/W4386071529","https://openalex.org/W4386275552","https://openalex.org/W4388266798","https://openalex.org/W4388505297","https://openalex.org/W4393156366","https://openalex.org/W4399422471","https://openalex.org/W4402594465","https://openalex.org/W4402716224","https://openalex.org/W4402727359","https://openalex.org/W4402753560","https://openalex.org/W4402916210","https://openalex.org/W4403488721","https://openalex.org/W4403888824","https://openalex.org/W4405785392","https://openalex.org/W4406895099","https://openalex.org/W4409370047","https://openalex.org/W4412684057","https://openalex.org/W4415798746"],"related_works":[],"abstract_inverted_index":{"Affordance":[0],"refers":[1],"to":[2,23,46,60,109],"the":[3,62,75,100,110,118,128,143,151,158,167,181,186,196],"functional":[4,135],"properties":[5],"that":[6,166],"an":[7],"agent":[8],"perceives":[9],"and":[10,15,30,53,89,132,145,177],"utilizes":[11],"from":[12],"its":[13],"environment,":[14],"is":[16,28],"key":[17],"perceptual":[18],"information":[19,27],"required":[20],"for":[21,65],"robots":[22],"perform":[24],"actions.":[25],"This":[26,126],"rich":[29],"multimodal":[31,35],"in":[32,40,150,195],"nature.":[33],"Existing":[34],"affordance":[36,78,161],"methods":[37],"face":[38],"limitations":[39],"extracting":[41],"useful":[42],"information,":[43],"mainly":[44],"due":[45],"simple":[47],"structural":[48],"designs,":[49],"basic":[50],"fusion":[51],"methods,":[52],"large":[54],"model":[55,201],"parameters,":[56],"making":[57],"it":[58,116],"difficult":[59],"meet":[61],"performance":[63,172],"requirements":[64],"practical":[66,206],"deployment.":[67],"To":[68],"address":[69],"these":[70],"issues,":[71],"this":[72],"paper":[73],"proposes":[74],"BiT-Align":[76],"image-depth-text":[77],"mapping":[79],"framework.":[80],"The":[81,139,209],"framework":[82],"includes":[83],"a":[84,90,107,192],"Bypass":[85],"Prompt":[86],"Module":[87],"(BPM)":[88],"Text":[91],"Feature":[92],"Guidance":[93],"(TFG)":[94],"attention":[95,148],"selection":[96,144],"mechanism.":[97],"BPM":[98],"integrates":[99],"auxiliary":[101],"modality":[102,112,120],"depth":[103],"image":[104,152],"directly":[105],"as":[106],"prompt":[108],"primary":[111,119],"RGB":[113],"image,":[114],"embedding":[115],"into":[117],"encoder":[121,153],"without":[122],"introducing":[123],"additional":[124],"encoders.":[125],"reduces":[127],"model\u2019s":[129],"parameter":[130],"count":[131],"effectively":[133],"improves":[134],"region":[136],"localization":[137],"accuracy.":[138],"TFG":[140],"mechanism":[141],"guides":[142],"enhancement":[146],"of":[147,160],"heads":[149],"using":[154],"textual":[155],"features,":[156],"improving":[157],"understanding":[159],"characteristics.":[162],"Experimental":[163],"results":[164],"demonstrate":[165],"proposed":[168],"method":[169],"achieves":[170],"significant":[171],"improvements":[173],"on":[174],"public":[175],"AGD20K":[176,182],"HICO-IIF":[178],"datasets.":[179],"On":[180],"dataset,":[183],"compared":[184],"with":[185],"current":[187],"state-of-the-art":[188],"method,":[189],"we":[190],"achieve":[191],"6.0%":[193],"improvement":[194],"KLD":[197],"metric,":[198],"while":[199],"reducing":[200],"parameters":[202],"by":[203],"88.8%,":[204],"demonstrating":[205],"application":[207],"values.":[208],"source":[210],"code":[211],"will":[212],"be":[213],"made":[214],"publicly":[215],"available":[216],"at":[217],"https://github.com/DAWDSE/BiT-Align.":[218]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-28T00:00:00"}
