{"id":"https://openalex.org/W7138226417","doi":"https://doi.org/10.1609/aaai.v40i29.39677","title":"Continuous Vision-Language-Action Co-Learning with Semantic-Physical Alignment for Behavioral Cloning","display_name":"Continuous Vision-Language-Action Co-Learning with Semantic-Physical Alignment for Behavioral Cloning","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138226417","doi":"https://doi.org/10.1609/aaai.v40i29.39677"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i29.39677","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i29.39677","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39677/43638","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39677/43638","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129653960","display_name":"Xiuxiu Qi","orcid":null},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Xiuxiu Qi","raw_affiliation_strings":["The College of Artificial Intelligence & Shenzhen Research Institute, Nankai University, Tianjin, China\nDepartment of Computing, The Hong Kong Polytechnic University, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The College of Artificial Intelligence & Shenzhen Research Institute, Nankai University, Tianjin, China\nDepartment of Computing, The Hong Kong Polytechnic University, Hong Kong SAR, China","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129672577","display_name":"Yu Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210086892","display_name":"Education University of Hong Kong","ror":"https://ror.org/000t0f062","country_code":"HK","type":"education","lineage":["https://openalex.org/I4210086892"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yu Yang","raw_affiliation_strings":["Centre for Learning, Teaching and Technology, The Education University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"Centre for Learning, Teaching and Technology, The Education University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I4210086892"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129696057","display_name":"Jiannong Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Jiannong Cao","raw_affiliation_strings":["Department of Computing, The Hong Kong Polytechnic University, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong SAR, China","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129664113","display_name":"Luyao Bai","orcid":null},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Luyao Bai","raw_affiliation_strings":["Department of Computing, The Hong Kong Polytechnic University, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong SAR, China","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027234719","display_name":"Chongshan Fan","orcid":"https://orcid.org/0000-0001-5171-102X"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chongshan Fan","raw_affiliation_strings":["The College of Artificial Intelligence & Shenzhen Research Institute, Nankai University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"The College of Artificial Intelligence & Shenzhen Research Institute, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025144135","display_name":"Chengtai Cao","orcid":"https://orcid.org/0000-0003-3944-8358"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Chengtai Cao","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129713435","display_name":"Hongpeng Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongpeng Wang","raw_affiliation_strings":["The College of Artificial Intelligence & Shenzhen Research Institute, Nankai University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"The College of Artificial Intelligence & Shenzhen Research Institute, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129653960"],"corresponding_institution_ids":["https://openalex.org/I14243506"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.63541667,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"29","first_page":"24900","last_page":"24908"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.6439999938011169,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.6439999938011169,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.10019999742507935,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.06379999965429306,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cloning","display_name":"Cloning (programming)","score":0.6187000274658203},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5996000170707703},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5985999703407288},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5931000113487244},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5760999917984009},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5360999703407288},{"id":"https://openalex.org/keywords/fragment","display_name":"Fragment (logic)","score":0.3750999867916107},{"id":"https://openalex.org/keywords/classification-of-discontinuities","display_name":"Classification of discontinuities","score":0.3662000000476837}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7190999984741211},{"id":"https://openalex.org/C121050878","wikidata":"https://www.wikidata.org/wiki/Q5135020","display_name":"Cloning (programming)","level":2,"score":0.6187000274658203},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5996000170707703},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5985999703407288},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5931000113487244},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5803999900817871},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5760999917984009},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5360999703407288},{"id":"https://openalex.org/C2776235265","wikidata":"https://www.wikidata.org/wiki/Q18392052","display_name":"Fragment (logic)","level":2,"score":0.3750999867916107},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.373199999332428},{"id":"https://openalex.org/C15627037","wikidata":"https://www.wikidata.org/wiki/Q541961","display_name":"Classification of discontinuities","level":2,"score":0.3662000000476837},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3458000123500824},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3422999978065491},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.3319999873638153},{"id":"https://openalex.org/C207673951","wikidata":"https://www.wikidata.org/wiki/Q1303150","display_name":"Compounding","level":2,"score":0.3287000060081482},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.3156000077724457},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.2971999943256378},{"id":"https://openalex.org/C39920418","wikidata":"https://www.wikidata.org/wiki/Q11476","display_name":"Kinematics","level":2,"score":0.2694000005722046},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2669000029563904},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.2540000081062317}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i29.39677","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i29.39677","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39677/43638","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i29.39677","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i29.39677","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39677/43638","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2981938667","display_name":null,"funder_award_id":"Shenzhen","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3187966953","display_name":null,"funder_award_id":"31100900","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4020255992","display_name":null,"funder_award_id":"Project","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4257047158","display_name":null,"funder_award_id":"202307","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5762773393","display_name":null,"funder_award_id":"202108","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5854786607","display_name":null,"funder_award_id":"Tianjin","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322598","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98"},{"id":"https://openalex.org/F4320324962","display_name":"Peking Union Medical College Hospital","ror":"https://ror.org/04jztag35"},{"id":"https://openalex.org/F4320326414","display_name":"Education University of Hong Kong","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138226417.pdf","grobid_xml":"https://content.openalex.org/works/W7138226417.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Language-Conditioned":[0],"Manipulation":[1],"(LCM)":[2],"facilitates":[3],"human-robot":[4],"interaction":[5],"via":[6],"Behavioral":[7],"Cloning":[8],"(BC),":[9],"which":[10],"learns":[11],"control":[12],"policies":[13],"from":[14,56],"human":[15],"demonstrations":[16],"and":[17,59,67,91,98,109,180],"serves":[18],"as":[19],"a":[20,33,82,125,171],"cornerstone":[21],"of":[22,139],"embodied":[23],"AI.":[24],"Overcoming":[25],"compounding":[26,43],"errors":[27,44],"in":[28,163],"sequential":[29],"action":[30,65,100,133],"decisions":[31],"remains":[32],"central":[34],"challenge":[35],"to":[36,63,121,128,159],"improving":[37],"BC":[38,84],"performance.":[39],"Existing":[40],"approaches":[41],"mitigate":[42],"through":[45,103],"data":[46],"augmentation,":[47],"expressive":[48],"representation,":[49],"or":[50],"temporal":[51],"abstraction.":[52],"However,":[53],"they":[54],"suffer":[55],"physical":[57],"discontinuities":[58],"semantic-physical":[60,140],"misalignment,":[61],"leading":[62],"inaccurate":[64],"cloning":[66],"intermittent":[68],"execution.":[69],"In":[70],"this":[71],"paper,":[72],"we":[73,117],"present":[74],"Continuous":[75],"vision-language-action":[76],"Co-Learning":[77],"with":[78,157],"Semantic-Physical":[79],"Alignment":[80],"(CCoL),":[81],"novel":[83],"framework":[85],"that":[86,145],"ensures":[87],"temporally":[88],"consistent":[89],"execution":[90,101],"fine-grained":[92],"semantic":[93],"grounding.":[94],"It":[95],"generates":[96],"robust":[97],"smooth":[99],"trajectories":[102],"continuous":[104],"co-learning":[105],"across":[106,153],"vision,":[107],"language,":[108],"proprioceptive":[110],"inputs":[111],"(i.e.,":[112],"robot":[113,173],"internal":[114],"states).":[115],"Meanwhile,":[116],"anchor":[118],"language":[119],"semantics":[120],"visuomotor":[122],"representations":[123],"by":[124],"bidirectional":[126],"cross-attention":[127],"learn":[129],"contextual":[130],"information":[131],"for":[132],"generation,":[134],"successfully":[135],"overcoming":[136],"the":[137],"problem":[138],"misalignment.":[141],"Extensive":[142],"experiments":[143],"show":[144],"CCoL":[146],"achieves":[147],"an":[148],"average":[149],"8.0%":[150],"relative":[151,161],"improvement":[152],"three":[154],"simulation":[155],"suites,":[156],"up":[158],"19.2%":[160],"gain":[162],"human-demonstrated":[164],"bimanual":[165],"insertion":[166],"tasks.":[167],"Real-world":[168],"tests":[169],"on":[170],"7-DoF":[172],"further":[174],"confirm":[175],"CCoL\u2019s":[176],"generalization":[177],"under":[178],"unseen":[179],"noisy":[181],"object":[182],"states.":[183]},"counts_by_year":[],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2026-03-18T00:00:00"}
