{"id":"https://openalex.org/W4407168289","doi":"https://doi.org/10.1109/tro.2025.3539193","title":"Fusion-Perception-to-Action Transformer: Enhancing Robotic Manipulation With 3-D Visual Fusion Attention and Proprioception","display_name":"Fusion-Perception-to-Action Transformer: Enhancing Robotic Manipulation With 3-D Visual Fusion Attention and Proprioception","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4407168289","doi":"https://doi.org/10.1109/tro.2025.3539193"},"language":"en","primary_location":{"id":"doi:10.1109/tro.2025.3539193","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tro.2025.3539193","pdf_url":null,"source":{"id":"https://openalex.org/S144620930","display_name":"IEEE Transactions on Robotics","issn_l":"1552-3098","issn":["1552-3098","1941-0468"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Robotics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yangjun Liu","orcid":"https://orcid.org/0000-0002-8170-2463"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]},{"id":"https://openalex.org/I6469544","display_name":"City University of Macau","ror":"https://ror.org/04gpd4q15","country_code":"MO","type":"education","lineage":["https://openalex.org/I6469544"]}],"countries":["MO"],"is_corresponding":true,"raw_author_name":"Yangjun Liu","raw_affiliation_strings":["State Key Laboratory of Internet of Things for Smart City, Centre for Artificial Intelligence and Robotics, Department of Electromechanical Engineering, University of Macau, Macau, China","State Key Laboratory of Internet of Things for Smart City, and Centre for Artificial Intelligence and Robotics, and Department of Electromechanical Engineering, University of Macau, Macau, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Internet of Things for Smart City, Centre for Artificial Intelligence and Robotics, Department of Electromechanical Engineering, University of Macau, Macau, China","institution_ids":["https://openalex.org/I6469544","https://openalex.org/I204512498"]},{"raw_affiliation_string":"State Key Laboratory of Internet of Things for Smart City, and Centre for Artificial Intelligence and Robotics, and Department of Electromechanical Engineering, University of Macau, Macau, China","institution_ids":["https://openalex.org/I6469544","https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101788185","display_name":"Sheng Liu","orcid":"https://orcid.org/0000-0001-5790-4324"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sheng Liu","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Binghan Chen","orcid":"https://orcid.org/0000-0001-6780-8475"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Binghan Chen","raw_affiliation_strings":["Guangdong Provincial Key Laboratory of Robotics and Intelligent System, Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Guangdong Provincial Key Laboratory of Robotics and Intelligent System, Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011123518","display_name":"Zhi-Xin Yang","orcid":"https://orcid.org/0000-0001-9151-7758"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]},{"id":"https://openalex.org/I6469544","display_name":"City University of Macau","ror":"https://ror.org/04gpd4q15","country_code":"MO","type":"education","lineage":["https://openalex.org/I6469544"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Zhi-Xin Yang","raw_affiliation_strings":["State Key Laboratory of Internet of Things for Smart City, Centre for Artificial Intelligence and Robotics, Department of Electromechanical Engineering, University of Macau, Macau, China","State Key Laboratory of Internet of Things for Smart City, and Centre for Artificial Intelligence and Robotics, and Department of Electromechanical Engineering, University of Macau, Macau, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Internet of Things for Smart City, Centre for Artificial Intelligence and Robotics, Department of Electromechanical Engineering, University of Macau, Macau, China","institution_ids":["https://openalex.org/I6469544","https://openalex.org/I204512498"]},{"raw_affiliation_string":"State Key Laboratory of Internet of Things for Smart City, and Centre for Artificial Intelligence and Robotics, and Department of Electromechanical Engineering, University of Macau, Macau, China","institution_ids":["https://openalex.org/I6469544","https://openalex.org/I204512498"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110929746","display_name":"Sheng Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sheng Xu","raw_affiliation_strings":["Guangdong Provincial Key Laboratory of Robotics and Intelligent System, Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Guangdong Provincial Key Laboratory of Robotics and Intelligent System, Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I204512498","https://openalex.org/I6469544"],"apc_list":null,"apc_paid":null,"fwci":11.6169,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.98551231,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"41","issue":null,"first_page":"1553","last_page":"1567"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9416000247001648,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9416000247001648,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6031819581985474},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.6021813154220581},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5895432829856873},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.579677164554596},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5610809326171875},{"id":"https://openalex.org/keywords/proprioception","display_name":"Proprioception","score":0.5596959590911865},{"id":"https://openalex.org/keywords/active-perception","display_name":"Active perception","score":0.5024456977844238},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.4761008024215698},{"id":"https://openalex.org/keywords/visual-perception","display_name":"Visual perception","score":0.4450221359729767},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4394325315952301},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4107043445110321},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.3538493514060974},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.30513614416122437},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.2800913155078888},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.10152974724769592},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.08466118574142456},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.0767330527305603}],"concepts":[{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6031819581985474},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.6021813154220581},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5895432829856873},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.579677164554596},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5610809326171875},{"id":"https://openalex.org/C171790689","wikidata":"https://www.wikidata.org/wiki/Q1129066","display_name":"Proprioception","level":2,"score":0.5596959590911865},{"id":"https://openalex.org/C2776010242","wikidata":"https://www.wikidata.org/wiki/Q4677575","display_name":"Active perception","level":3,"score":0.5024456977844238},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.4761008024215698},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.4450221359729767},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4394325315952301},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4107043445110321},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.3538493514060974},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.30513614416122437},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.2800913155078888},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.10152974724769592},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.08466118574142456},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0767330527305603},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tro.2025.3539193","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tro.2025.3539193","pdf_url":null,"source":{"id":"https://openalex.org/S144620930","display_name":"IEEE Transactions on Robotics","issn_l":"1552-3098","issn":["1552-3098","1941-0468"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Robotics","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3587908381","display_name":null,"funder_award_id":"2022B1515120010","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"},{"id":"https://openalex.org/G4074719757","display_name":null,"funder_award_id":"62461160260","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4720968450","display_name":null,"funder_award_id":"2023A0505030003","funder_id":"https://openalex.org/F4320324202","funder_display_name":"Guangdong Science and Technology Department"},{"id":"https://openalex.org/G5335157455","display_name":null,"funder_award_id":"2020B1515130001","funder_id":"https://openalex.org/F4320324202","funder_display_name":"Guangdong Science and Technology Department"},{"id":"https://openalex.org/G5671542690","display_name":null,"funder_award_id":"MYRG-GRG2023-00237-FST-UMDF","funder_id":"https://openalex.org/F4320322841","funder_display_name":"Universidade de Macau"},{"id":"https://openalex.org/G6090389588","display_name":null,"funder_award_id":"MYRG-GRG2024-00299-FST","funder_id":"https://openalex.org/F4320322841","funder_display_name":"Universidade de Macau"},{"id":"https://openalex.org/G6547727834","display_name":null,"funder_award_id":"U22A2064","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7493990453","display_name":null,"funder_award_id":"62273327","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322841","display_name":"Universidade de Macau","ror":"https://ror.org/01r4q9n85"},{"id":"https://openalex.org/F4320324202","display_name":"Guangdong Science and Technology Department","ror":"https://ror.org/00tjzgn92"},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1920022804","https://openalex.org/W1971086298","https://openalex.org/W2194775991","https://openalex.org/W2574247547","https://openalex.org/W2778092082","https://openalex.org/W2970710335","https://openalex.org/W2973325524","https://openalex.org/W2996355317","https://openalex.org/W3007769740","https://openalex.org/W3008105217","https://openalex.org/W3035662013","https://openalex.org/W3047385205","https://openalex.org/W3089931053","https://openalex.org/W3094502228","https://openalex.org/W3134664367","https://openalex.org/W3174849255","https://openalex.org/W3208245500","https://openalex.org/W4206944021","https://openalex.org/W4225947020","https://openalex.org/W4283726971","https://openalex.org/W4312712504","https://openalex.org/W4312728926","https://openalex.org/W4323767275","https://openalex.org/W4327522633","https://openalex.org/W4382366145","https://openalex.org/W4385245566","https://openalex.org/W4385403811","https://openalex.org/W4385626951","https://openalex.org/W4386066287","https://openalex.org/W4388692454","https://openalex.org/W4388979610","https://openalex.org/W4390778533","https://openalex.org/W4390782051","https://openalex.org/W4390873750","https://openalex.org/W4400228510","https://openalex.org/W6682849425","https://openalex.org/W6747473740","https://openalex.org/W6762287338","https://openalex.org/W6791353385","https://openalex.org/W6799838802","https://openalex.org/W6801810553","https://openalex.org/W6839446344","https://openalex.org/W6843759960","https://openalex.org/W6854091356","https://openalex.org/W6854108904","https://openalex.org/W6855562989","https://openalex.org/W6856810218"],"related_works":["https://openalex.org/W193314751","https://openalex.org/W4253813669","https://openalex.org/W2014373404","https://openalex.org/W2053711990","https://openalex.org/W2132659060","https://openalex.org/W2031992971","https://openalex.org/W3214791684","https://openalex.org/W2152662039","https://openalex.org/W2106688486","https://openalex.org/W4308237304"],"abstract_inverted_index":{"Most":[0],"prior":[1],"robot":[2],"learning":[3],"methods":[4,29,196,201],"focus":[5],"on":[6],"image-based":[7],"observations,":[8],"limiting":[9],"their":[10],"capability":[11],"in":[12,71,202],"3-D":[13,72,78,111],"robotic":[14],"manipulation.":[15],"Voxel":[16],"representation":[17],"naturally":[18],"delivers":[19],"rich":[20],"spatial":[21,123],"features":[22],"but":[23],"remains":[24],"underutilized.":[25],"Specifically,":[26],"current":[27],"voxel-based":[28],"struggle":[30],"with":[31,63],"fine-grained":[32,69],"tasks,":[33],"since":[34],"precise":[35,146],"actions":[36,179],"are":[37],"not":[38],"fully":[39],"achievable.":[40],"However,":[41],"humans":[42],"can":[43,120],"accomplish":[44],"these":[45],"tasks":[46],"well":[47],"using":[48],"vision":[49],"and":[50,93,106,118,162,186,192,205],"proprioception.":[51],"Inspired":[52],"by":[53,133],"this,":[54],"this":[55],"article":[56],"proposed":[57],"a":[58,76,110,150,171],"novel":[59],"Fusion-Perception-to-Action":[60],"Transformer":[61],"(FP2AT)":[62],"cross-layer":[64],"feature":[65],"aggregation":[66],"to":[67,85,88,155,183],"handle":[68],"manipulation":[70],"space.":[73],"In":[74,148,189],"particular,":[75],"multiscale":[77],"visual":[79,104,112],"fusion":[80],"attention":[81,87,114],"mechanism":[82,115],"is":[83,116,153,181],"devised":[84],"draw":[86],"local":[89],"regions":[90],"of":[91,96,103,131,159,167,177],"interest":[92],"maintain":[94],"awareness":[95],"global":[97],"scenes,":[98],"thereby":[99],"boosting":[100],"the":[101,129,141,157,165,168,174],"capabilities":[102],"perception":[105,158],"action":[107,142],"planning.":[108],"Meanwhile,":[109],"mutual":[113],"designed":[117],"it":[119],"also":[121],"enhance":[122],"perception.":[124],"Besides,":[125],"we":[126],"further":[127],"explore":[128],"potential":[130],"FP2AT":[132],"developing":[134],"its":[135],"coarse-to-fine":[136],"version,":[137],"which":[138],"progressively":[139],"refines":[140],"space":[143],"for":[144],"more":[145],"predictions.":[147],"addition,":[149],"proprioceptive":[151],"encoder":[152],"developed":[154],"mimic":[156],"body":[160],"movements":[161],"contact,":[163],"elevating":[164],"effectiveness":[166],"FP2AT.":[169],"Furthermore,":[170],"new":[172],"metric,":[173],"average":[175],"number":[176],"key":[178],"(ANKA),":[180],"introduced":[182],"evaluate":[184],"efficiency":[185],"planning":[187],"capability.":[188],"various":[190],"simulated":[191],"real-robot":[193],"examples,":[194],"our":[195],"significantly":[197],"outperform":[198],"state-of-the-art":[199],"3-D-vision-based":[200],"success":[203],"rate":[204],"ANKA":[206],"metrics.":[207]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":8}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
