{"id":"https://openalex.org/W7160864713","doi":"https://doi.org/10.1109/lra.2026.3692096","title":"GIL-3D: U-Shaped Diffusion Transformers for Generalizable 3D Imitation Learning","display_name":"GIL-3D: U-Shaped Diffusion Transformers for Generalizable 3D Imitation Learning","publication_year":2026,"publication_date":"2026-05-11","ids":{"openalex":"https://openalex.org/W7160864713","doi":"https://doi.org/10.1109/lra.2026.3692096"},"language":null,"primary_location":{"id":"doi:10.1109/lra.2026.3692096","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2026.3692096","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135862901","display_name":"Xiyue Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiyue Wang","raw_affiliation_strings":["College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0004-5455-7987","affiliations":[{"raw_affiliation_string":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105820351","display_name":"Aoran Mei","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Aoran Mei","raw_affiliation_strings":["College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0002-6737-8983","affiliations":[{"raw_affiliation_string":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135878510","display_name":"Linzhi Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linzhi Wu","raw_affiliation_strings":["College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135845194","display_name":"Zhongxue Gan","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongxue Gan","raw_affiliation_strings":["College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-1365-396X","affiliations":[{"raw_affiliation_string":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101366363","display_name":"Guo-Niu Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guo-Niu Zhu","raw_affiliation_strings":["College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-2421-740X","affiliations":[{"raw_affiliation_string":"College of Intelligent Robotics and Advanced Manufacturing, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.61113871,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"11","issue":"7","first_page":"7932","last_page":"7939"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.12809999287128448,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.12809999287128448,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.11469999700784683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.10859999805688858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.39649999141693115},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.27410000562667847},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.26179999113082886},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.21860000491142273},{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.21850000321865082}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5788000226020813},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5339999794960022},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.39649999141693115},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3208000063896179},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.27410000562667847},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.23880000412464142},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.21860000491142273},{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.21850000321865082},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.21809999644756317}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2026.3692096","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2026.3692096","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Imitation":[0],"learning":[1,28],"with":[2,73],"3D":[3,26],"vision":[4],"effectively":[5],"alleviates":[6],"the":[7,36,66,74,120],"impact":[8],"of":[9,45,70,79],"variations":[10],"in":[11,102,142,167],"lighting,":[12],"background,":[13],"and":[14,48,52,94,118,125,148],"texture.":[15],"It":[16],"exhibits":[17],"superior":[18],"robustness":[19],"compared":[20],"to":[21,42,99,114],"2D-based":[22],"methods.":[23],"However,":[24],"existing":[25,136],"imitation":[27],"methods":[29],"often":[30],"suffer":[31],"from":[32],"performance":[33,166],"degradation":[34],"as":[35],"task":[37],"horizon":[38],"increases,":[39],"primarily":[40],"due":[41],"insufficient":[43],"modeling":[44,77],"temporal":[46,76,85],"dependencies":[47],"misalignment":[49],"between":[50,122],"states":[51],"actions.":[53],"To":[54,81],"address":[55],"these":[56],"challenges,":[57],"we":[58,88,107],"propose":[59],"GIL-3D,":[60],"a":[61,109,139,149],"novel":[62],"framework":[63],"that":[64,131,162],"combines":[65],"stable":[67],"training":[68],"dynamics":[69],"diffusion":[71],"models":[72],"global":[75],"capability":[78],"Transformers.":[80],"further":[82],"enhance":[83],"multi-scale":[84],"dependency":[86],"modeling,":[87],"explore":[89],"alternative":[90],"U-shaped":[91],"hierarchical":[92],"architectures":[93],"introduce":[95],"strided":[96],"skip":[97],"connections":[98],"reduce":[100],"redundancy":[101],"dense":[103],"feature":[104],"fusion.":[105],"Moreover,":[106],"present":[108],"full-sequence":[110],"joint":[111],"attention":[112],"mechanism":[113],"strengthen":[115],"cross-modal":[116],"interactions":[117],"improve":[119],"consistency":[121],"visual":[123],"perception":[124],"action":[126],"generation.":[127],"Extensive":[128],"experiments":[129],"demonstrate":[130],"our":[132],"model":[133],"consistently":[134],"outperforms":[135],"baselines,":[137],"achieving":[138],"16.1%":[140],"improvement":[141,151],"success":[143],"rate":[144],"on":[145,152],"simulated":[146],"benchmarks":[147],"15.83%":[150],"real-world":[153],"manipulation":[154],"tasks.":[155],"In":[156],"addition,":[157],"comprehensive":[158],"generalization":[159],"studies":[160],"show":[161],"GIL-3D":[163],"maintains":[164],"robust":[165],"previously":[168],"unseen":[169],"scenarios.":[170]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2026-05-12T00:00:00"}
