{"id":"https://openalex.org/W7128448646","doi":"https://doi.org/10.1109/lra.2026.3662647","title":"ManiVID-3D: Generalizable View-Invariant Reinforcement Learning for Robotic Manipulation via Disentangled 3D Representations","display_name":"ManiVID-3D: Generalizable View-Invariant Reinforcement Learning for Robotic Manipulation via Disentangled 3D Representations","publication_year":2026,"publication_date":"2026-02-09","ids":{"openalex":"https://openalex.org/W7128448646","doi":"https://doi.org/10.1109/lra.2026.3662647"},"language":null,"primary_location":{"id":"doi:10.1109/lra.2026.3662647","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2026.3662647","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125469900","display_name":"Zheng Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zheng Li","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"raw_orcid":"https://orcid.org/0009-0005-7182-444X","affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125459197","display_name":"Pei Qu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pei Qu","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102635978","display_name":"Yufei Jia","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yufei Jia","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0000-7590-9647","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102498460","display_name":"Shihui Zhou","orcid":"https://orcid.org/0000-0001-5251-8352"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shihui Zhou","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125438690","display_name":"Haizhou Ge","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haizhou Ge","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-8388-7193","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124010280","display_name":"Jiahang Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Jiahang Cao","raw_affiliation_strings":["The University of Hong Kong, Hong Kong SAR, China"],"raw_orcid":"https://orcid.org/0000-0003-4338-4414","affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jinni Zhou","orcid":"https://orcid.org/0000-0001-7282-4541"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jinni Zhou","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0001-7282-4541","affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125468439","display_name":"Guyue Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guyue Zhou","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-3894-9858","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5125449750","display_name":"Jun Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jun Ma","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-9405-8232","affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5125469900"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25844863,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"11","issue":"4","first_page":"4235","last_page":"4242"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.4027000069618225,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.4027000069618225,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.24959999322891235,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.09440000355243683,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6875},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5936999917030334},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.5533000230789185},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5321000218391418},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.5295000076293945},{"id":"https://openalex.org/keywords/viewpoints","display_name":"Viewpoints","score":0.48179998993873596},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.43790000677108765},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.43720000982284546}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7857999801635742},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6875},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6552000045776367},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5936999917030334},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.5533000230789185},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5321000218391418},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.5295000076293945},{"id":"https://openalex.org/C2776035091","wikidata":"https://www.wikidata.org/wiki/Q7928819","display_name":"Viewpoints","level":2,"score":0.48179998993873596},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4424000084400177},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.43790000677108765},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.43720000982284546},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.4090000092983246},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.3950999975204468},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.36959999799728394},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3425999879837036},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31130000948905945},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.30070000886917114},{"id":"https://openalex.org/C3261483","wikidata":"https://www.wikidata.org/wiki/Q119565","display_name":"Frame rate","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C193611912","wikidata":"https://www.wikidata.org/wiki/Q4677596","display_name":"Active vision","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.26269999146461487},{"id":"https://openalex.org/C150415221","wikidata":"https://www.wikidata.org/wiki/Q40687","display_name":"Robotic arm","level":2,"score":0.25360000133514404},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2026.3662647","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2026.3662647","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4611484110355377,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2034725503","https://openalex.org/W2158782408","https://openalex.org/W2913871909","https://openalex.org/W2975950300","https://openalex.org/W3035624836","https://openalex.org/W3131554325","https://openalex.org/W4313156423","https://openalex.org/W4319300865","https://openalex.org/W4328007461","https://openalex.org/W4400188660","https://openalex.org/W4401416679","https://openalex.org/W4402354045","https://openalex.org/W4409263028","https://openalex.org/W4409364492","https://openalex.org/W4412642683","https://openalex.org/W4412687571","https://openalex.org/W4413144615","https://openalex.org/W4413155863","https://openalex.org/W4415797227","https://openalex.org/W4416749295","https://openalex.org/W4416749759","https://openalex.org/W4416750344"],"related_works":[],"abstract_inverted_index":{"Deploying":[0],"visual":[1,135],"reinforcement":[2],"learning":[3,185],"(RL)":[4],"policies":[5],"in":[6,33,193],"real-world":[7,34,147],"manipulation":[8,192],"is":[9,29,39],"often":[10,46],"hindered":[11],"by":[12],"camera":[13,23,28,50],"viewpoint":[14,163],"changes.":[15,57],"A":[16],"policy":[17],"trained":[18],"from":[19,98],"a":[20,65,87,102,154],"fixed":[21],"front-facing":[22],"may":[24],"fail":[25],"when":[26],"the":[27,108,182],"shifted-an":[30],"unavoidable":[31],"situation":[32],"settings":[35],"where":[36],"sensor":[37],"placement":[38],"hard":[40],"to":[41,173],"manage":[42],"appropriately.":[43],"Existing":[44],"methods":[45,161],"rely":[47],"on":[48],"precise":[49],"calibration":[51],"or":[52],"struggle":[53],"with":[54],"large":[55],"perspective":[56,175],"To":[58],"address":[59],"these":[60],"limitations,":[61],"we":[62,114],"propose":[63],"ManiVID-3D,":[64],"novel":[66],"3D":[67,134],"RL":[68,136],"architecture":[69],"designed":[70],"for":[71,110,133,189],"robotic":[72,191],"manipulation,":[73],"which":[74],"learns":[75],"view-invariant":[76],"representations":[77,188],"through":[78],"self-supervised":[79],"disentangled":[80],"feature":[81],"learning.":[82],"The":[83,170],"framework":[84],"incorporates":[85],"ViewNet,":[86],"lightweight":[88],"yet":[89],"effective":[90],"module":[91,121],"that":[92,150],"automatically":[93],"aligns":[94],"point":[95],"cloud":[96],"observations":[97],"arbitrary":[99],"viewpoints":[100],"into":[101],"unified":[103],"spatial":[104],"coordinate":[105],"system":[106],"without":[107],"need":[109],"extrinsic":[111],"calibration.":[112],"Additionally,":[113],"develop":[115],"an":[116],"efficient":[117],"GPU-accelerated":[118],"batch":[119],"rendering":[120],"capable":[122],"of":[123,184],"processing":[124],"over":[125],"5000":[126],"frames":[127],"per":[128],"second,":[129],"enabling":[130],"large-scale":[131],"training":[132],"at":[137],"unprecedented":[138],"speeds.":[139],"Extensive":[140],"evaluation":[141],"across":[142],"10":[143],"simulated":[144],"and":[145,177],"5":[146],"tasks":[148],"demonstrates":[149],"our":[151],"approach":[152],"achieves":[153],"40.6%":[155],"higher":[156],"success":[157],"rate":[158],"than":[159],"state-of-the-art":[160],"under":[162],"variations":[164],"while":[165],"using":[166],"80%":[167],"fewer":[168],"parameters.":[169],"system's":[171],"robustness":[172],"severe":[174],"changes":[176],"strong":[178],"sim-to-real":[179],"performance":[180],"highlight":[181],"effectiveness":[183],"geometrically":[186],"consistent":[187],"scalable":[190],"unstructured":[194],"environments.":[195]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2026-02-10T00:00:00"}
