{"id":"https://openalex.org/W7157088553","doi":"https://doi.org/10.48550/arxiv.2604.23249","title":"BridgeACT: Bridging Human Demonstrations to Robot Actions via Unified Tool-Target Affordances","display_name":"BridgeACT: Bridging Human Demonstrations to Robot Actions via Unified Tool-Target Affordances","publication_year":2026,"publication_date":"2026-04-25","ids":{"openalex":"https://openalex.org/W7157088553","doi":"https://doi.org/10.48550/arxiv.2604.23249"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.23249","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.23249","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.23249","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134760591","display_name":"Yifan Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Han, Yifan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101896256","display_name":"Jianxiang Liu","orcid":"https://orcid.org/0000-0002-4379-3288"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jianxiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134750068","display_name":"Haoyu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Haoyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134752015","display_name":"Yuqi Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Yuqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125108562","display_name":"Yunhan Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Yunhan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134799294","display_name":"Wenzhao Lian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lian, Wenzhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5134760591"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.838100016117096,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.838100016117096,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.04089999943971634,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.02459999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/affordance","display_name":"Affordance","score":0.9584000110626221},{"id":"https://openalex.org/keywords/grasp","display_name":"GRASP","score":0.7071999907493591},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6973000168800354},{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.590399980545044},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.5501999855041504},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.47850000858306885},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.46459999680519104},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4632999897003174},{"id":"https://openalex.org/keywords/human\u2013robot-interaction","display_name":"Human\u2013robot interaction","score":0.41449999809265137}],"concepts":[{"id":"https://openalex.org/C194995250","wikidata":"https://www.wikidata.org/wiki/Q531136","display_name":"Affordance","level":2,"score":0.9584000110626221},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.7071999907493591},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6973000168800354},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6622999906539917},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.6297000050544739},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.590399980545044},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5759999752044678},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.5501999855041504},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.47850000858306885},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.46459999680519104},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4632999897003174},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.41449999809265137},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.4099999964237213},{"id":"https://openalex.org/C162947575","wikidata":"https://www.wikidata.org/wiki/Q2005645","display_name":"Social robot","level":5,"score":0.3553999960422516},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.34200000762939453},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.3310000002384186},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.32030001282691956},{"id":"https://openalex.org/C2779038628","wikidata":"https://www.wikidata.org/wiki/Q7248497","display_name":"Programming by demonstration","level":3,"score":0.31769999861717224},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.3172999918460846},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.31700000166893005},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C74222875","wikidata":"https://www.wikidata.org/wiki/Q16000312","display_name":"Robot kinematics","level":4,"score":0.26899999380111694},{"id":"https://openalex.org/C192327766","wikidata":"https://www.wikidata.org/wiki/Q1038799","display_name":"Cognitive robotics","level":3,"score":0.2630000114440918},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.2603999972343445},{"id":"https://openalex.org/C28063669","wikidata":"https://www.wikidata.org/wiki/Q7167042","display_name":"Perceptual system","level":3,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.23249","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.23249","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.23249","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.23249","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Learning":[0],"robot":[1,23,32,71,91,136],"manipulation":[2,63,95,159,180],"from":[3,65,127],"human":[4,15,66,88,128],"videos":[5,67],"is":[6,77],"appealing":[7],"due":[8],"to":[9,21,78,101,105,135,190],"the":[10,44,117],"scale":[11],"and":[12,47,90,103,120,142,174,188,194],"diversity":[13],"of":[14,163,171],"demonstrations,":[16],"but":[17],"transferring":[18],"such":[19],"demonstrations":[20,89],"executable":[22],"behavior":[24],"remains":[25],"challenging.":[26],"Prior":[27],"work":[28],"either":[29],"relies":[30],"on":[31,151,178],"data":[33],"for":[34],"downstream":[35],"adaptation":[36],"or":[37],"learns":[38,61],"affordance":[39,80,114,164],"representations":[40],"that":[41,60,86,183],"remain":[42],"at":[43],"perception":[45],"level":[46],"do":[48],"not":[49],"directly":[50,64],"support":[51],"real-world":[52,179],"execution.":[53],"We":[54],"present":[55],"BridgeACT,":[56],"an":[57,82],"affordance-driven":[58],"framework":[59],"robotic":[62],"without":[68],"requiring":[69],"any":[70],"demonstration":[72],"data.":[73],"Our":[74],"key":[75],"idea":[76],"model":[79],"as":[81,161],"embodiment-agnostic":[83],"intermediate":[84],"representation":[85],"bridges":[87],"actions.":[92],"BridgeACT":[93,110,184],"decomposes":[94],"into":[96],"two":[97],"complementary":[98],"problems:":[99],"where":[100],"grasp":[102],"how":[104],"move.":[106],"To":[107],"this":[108],"end,":[109],"first":[111],"grounds":[112],"task-relevant":[113],"regions":[115],"in":[116],"current":[118],"scene,":[119],"then":[121],"predicts":[122],"task-conditioned":[123],"3D":[124],"motion":[125,146],"affordances":[126,132],"demonstrations.":[129],"The":[130],"resulting":[131],"are":[133],"mapped":[134],"actions":[137],"through":[138],"a":[139,143,168],"grasping":[140],"module":[141],"lightweight":[144],"closed-loop":[145],"controller,":[147],"enabling":[148],"direct":[149],"deployment":[150],"real":[152],"robots.":[153],"In":[154],"addition,":[155],"we":[156],"represent":[157],"complex":[158],"tasks":[160,173,181],"compositions":[162],"operations,":[165],"which":[166],"allows":[167],"unified":[169],"treatment":[170],"diverse":[172],"object-to-object":[175],"interactions.":[176],"Experiments":[177],"show":[182],"outperforms":[185],"prior":[186],"baselines":[187],"generalizes":[189],"unseen":[191],"objects,":[192],"scenes,":[193],"viewpoints.":[195]},"counts_by_year":[],"updated_date":"2026-04-29T06:16:36.941037","created_date":"2026-04-29T00:00:00"}
