{"id":"https://openalex.org/W4376481256","doi":"https://doi.org/10.1109/tcsvt.2023.3275814","title":"Cross-Level Multi-Modal Features Learning With Transformer for RGB-D Object Recognition","display_name":"Cross-Level Multi-Modal Features Learning With Transformer for RGB-D Object Recognition","publication_year":2023,"publication_date":"2023-05-12","ids":{"openalex":"https://openalex.org/W4376481256","doi":"https://doi.org/10.1109/tcsvt.2023.3275814"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2023.3275814","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3275814","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048357136","display_name":"Ying Zhang","orcid":"https://orcid.org/0000-0001-8982-8223"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ying Zhang","raw_affiliation_strings":["School of Electrical Engineering, and the Key Laboratory of Intelligent Rehabilitation and Neuromodulation of Hebei Province, Yanshan University, Qinhuangdao, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, and the Key Laboratory of Intelligent Rehabilitation and Neuromodulation of Hebei Province, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062723492","display_name":"Maoliang Yin","orcid":"https://orcid.org/0009-0003-8106-7521"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Maoliang Yin","raw_affiliation_strings":["School of Electrical Engineering, and the Key Laboratory of Intelligent Rehabilitation and Neuromodulation of Hebei Province, Yanshan University, Qinhuangdao, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, and the Key Laboratory of Intelligent Rehabilitation and Neuromodulation of Hebei Province, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101453706","display_name":"Heyong Wang","orcid":"https://orcid.org/0009-0006-7225-2137"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Heyong Wang","raw_affiliation_strings":["School of Electrical Engineering, and the Key Laboratory of Intelligent Rehabilitation and Neuromodulation of Hebei Province, Yanshan University, Qinhuangdao, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, and the Key Laboratory of Intelligent Rehabilitation and Neuromodulation of Hebei Province, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100380201","display_name":"Changchun Hua","orcid":"https://orcid.org/0000-0001-6311-2112"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changchun Hua","raw_affiliation_strings":["School of Electrical Engineering, and the Key Laboratory of Intelligent Rehabilitation and Neuromodulation of Hebei Province, Yanshan University, Qinhuangdao, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, and the Key Laboratory of Intelligent Rehabilitation and Neuromodulation of Hebei Province, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5048357136"],"corresponding_institution_ids":["https://openalex.org/I39333907"],"apc_list":null,"apc_paid":null,"fwci":1.9412,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.88113378,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"33","issue":"12","first_page":"7121","last_page":"7130"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.8094617128372192},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7480193376541138},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5980962514877319},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.5886175632476807},{"id":"https://openalex.org/keywords/3d-single-object-recognition","display_name":"3D single-object recognition","score":0.5022895336151123},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.49843502044677734},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.49279072880744934},{"id":"https://openalex.org/keywords/cognitive-neuroscience-of-visual-object-recognition","display_name":"Cognitive neuroscience of visual object recognition","score":0.4912416338920593},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4882362186908722},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4848829209804535}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.8094617128372192},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7480193376541138},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5980962514877319},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.5886175632476807},{"id":"https://openalex.org/C14551309","wikidata":"https://www.wikidata.org/wiki/Q4636325","display_name":"3D single-object recognition","level":4,"score":0.5022895336151123},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.49843502044677734},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.49279072880744934},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.4912416338920593},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4882362186908722},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4848829209804535}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2023.3275814","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3275814","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.5299999713897705}],"awards":[{"id":"https://openalex.org/G1530245929","display_name":null,"funder_award_id":"62203377","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G264141345","display_name":null,"funder_award_id":"F2022203098","funder_id":"https://openalex.org/F4320322163","funder_display_name":"Natural Science Foundation of Hebei Province"},{"id":"https://openalex.org/G5417780894","display_name":null,"funder_award_id":"U22A2050","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5760530916","display_name":null,"funder_award_id":"62203378","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8808249639","display_name":null,"funder_award_id":"F2021203054","funder_id":"https://openalex.org/F4320322163","funder_display_name":"Natural Science Foundation of Hebei Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322163","display_name":"Natural Science Foundation of Hebei Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W1593727536","https://openalex.org/W1677409904","https://openalex.org/W1686810756","https://openalex.org/W1849277567","https://openalex.org/W1922904362","https://openalex.org/W1939398876","https://openalex.org/W2031983228","https://openalex.org/W2074142320","https://openalex.org/W2151103935","https://openalex.org/W2156222070","https://openalex.org/W2157331557","https://openalex.org/W2194775991","https://openalex.org/W2335630758","https://openalex.org/W2416637517","https://openalex.org/W2618530766","https://openalex.org/W2758703625","https://openalex.org/W2774550181","https://openalex.org/W2860627718","https://openalex.org/W2884585870","https://openalex.org/W2897187502","https://openalex.org/W2962837662","https://openalex.org/W2963011558","https://openalex.org/W2963032410","https://openalex.org/W2963446712","https://openalex.org/W2963901718","https://openalex.org/W2963956866","https://openalex.org/W2967153639","https://openalex.org/W2988916019","https://openalex.org/W2992853260","https://openalex.org/W3008014442","https://openalex.org/W3019632826","https://openalex.org/W3033210410","https://openalex.org/W3094488141","https://openalex.org/W3096609285","https://openalex.org/W3099587965","https://openalex.org/W3119686997","https://openalex.org/W3156988133","https://openalex.org/W3157704410","https://openalex.org/W3164802490","https://openalex.org/W3195158006","https://openalex.org/W3205510322","https://openalex.org/W4200583170","https://openalex.org/W4205202009","https://openalex.org/W4220889379","https://openalex.org/W4253193475","https://openalex.org/W4285244585","https://openalex.org/W4293370997","https://openalex.org/W4299314646","https://openalex.org/W4308235695","https://openalex.org/W4309762368","https://openalex.org/W4312541788","https://openalex.org/W6637373629","https://openalex.org/W6676766825","https://openalex.org/W6682725716","https://openalex.org/W6703264295","https://openalex.org/W6739901393","https://openalex.org/W6766978945","https://openalex.org/W6779248606"],"related_works":["https://openalex.org/W196505194","https://openalex.org/W2578478211","https://openalex.org/W2207218974","https://openalex.org/W2387237626","https://openalex.org/W2182443753","https://openalex.org/W2403129783","https://openalex.org/W2188151889","https://openalex.org/W3006949615","https://openalex.org/W1841786463","https://openalex.org/W113382539"],"abstract_inverted_index":{"Object":[0,174,178],"recognition,":[1,52],"one":[2],"of":[3,7,26,57,118,151,158,184,197],"the":[4,23,40,55,84,116,149,156,193,198],"main":[5],"goals":[6],"robot":[8,86],"vision,":[9],"is":[10],"a":[11,81,97,140],"vital":[12],"prerequisite":[13],"for":[14,50,83,107],"service":[15,85],"robots":[16],"to":[17,22,87,113,147,154],"perform":[18],"domestic":[19],"tasks.":[20],"Thanks":[21],"rich":[24],"sense":[25],"information":[27],"provided":[28],"by":[29,128],"RGB-D":[30,108,173,202],"sensors,":[31],"RGB-D-based":[32],"object":[33,51,109,203],"recognition":[34,62,204],"has":[35],"received":[36],"increasing":[37],"attention.":[38],"However,":[39],"existing":[41],"works":[42],"focus":[43],"on":[44,61,168],"collaborative":[45],"RGB":[46],"and":[47,130,143,177,187,195],"depth":[48,58,160],"data":[49],"while":[53],"ignoring":[54],"influence":[56],"image":[59],"quality":[60],"performance.":[63],"Moreover,":[64],"in":[65,201],"real-world":[66],"scenarios,":[67],"there":[68],"are":[69,126],"many":[70],"objects":[71,89],"with":[72,102,115,182],"strong":[73],"similarity":[74],"from":[75],"certain":[76],"observation":[77],"angles,":[78],"which":[79],"poses":[80],"challenge":[82],"recognize":[88],"accurately.":[90],"In":[91,111],"this":[92],"paper,":[93],"we":[94,138],"propose":[95],"CNN-TransNet,":[96],"novel":[98],"end-to-end":[99],"Transformer-based":[100],"architecture":[101],"convolutional":[103],"neural":[104],"networks":[105],"(CNNs)":[106],"recognition.":[110],"order":[112],"deal":[114],"effect":[117],"high":[119],"inter-class":[120],"similarity,":[121],"discriminative":[122],"multi-modal":[123,132,141],"feature":[124],"representations":[125],"generated":[127],"learning":[129],"relating":[131],"features":[133],"at":[134],"multiple":[135],"levels.":[136],"Besides,":[137],"employ":[139],"fusion":[142],"projection":[144],"(MMFP)":[145],"module":[146],"reweight":[148],"contribution":[150],"each":[152],"modality":[153],"address":[155],"problem":[157],"poor-quality":[159],"image.":[161],"Our":[162],"proposed":[163,199],"approach":[164],"achieves":[165],"state-of-the-art":[166],"performance":[167],"three":[169],"datasets":[170],"(including":[171],"Washington":[172],"Dataset,":[175],"JHUIT-50,":[176],"Clutter":[179],"Indoor":[180],"Dataset),":[181],"accuracy":[183],"95.4%,":[185],"98.1%,":[186],"94.7%,":[188],"respectively.":[189],"The":[190],"results":[191],"demonstrate":[192],"effectiveness":[194],"superiority":[196],"model":[200],"task.":[205]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-23T07:41:27.035349","created_date":"2025-10-10T00:00:00"}
