{"id":"https://openalex.org/W4389666289","doi":"https://doi.org/10.1109/iros55552.2023.10342235","title":"Enhancing Fine-Grained 3D Object Recognition Using Hybrid Multi-Modal Vision Transformer-CNN Models","display_name":"Enhancing Fine-Grained 3D Object Recognition Using Hybrid Multi-Modal Vision Transformer-CNN Models","publication_year":2023,"publication_date":"2023-10-01","ids":{"openalex":"https://openalex.org/W4389666289","doi":"https://doi.org/10.1109/iros55552.2023.10342235"},"language":"en","primary_location":{"id":"doi:10.1109/iros55552.2023.10342235","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros55552.2023.10342235","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://research.rug.nl/en/publications/f05bc75e-9720-4799-bee5-c698cc9609ad","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101187848","display_name":"Songsong Xiong","orcid":null},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Songsong Xiong","raw_affiliation_strings":["University of Groningen,Department of Artificial Intelligence,Groningen,The Netherlands","Department of Artificial Intelligence, University of Groningen, Groningen, The Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Groningen,Department of Artificial Intelligence,Groningen,The Netherlands","institution_ids":["https://openalex.org/I169381384"]},{"raw_affiliation_string":"Department of Artificial Intelligence, University of Groningen, Groningen, The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004690448","display_name":"Georgios Tziafas","orcid":null},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Georgios Tziafas","raw_affiliation_strings":["University of Groningen,Department of Artificial Intelligence,Groningen,The Netherlands","Department of Artificial Intelligence, University of Groningen, Groningen, The Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Groningen,Department of Artificial Intelligence,Groningen,The Netherlands","institution_ids":["https://openalex.org/I169381384"]},{"raw_affiliation_string":"Department of Artificial Intelligence, University of Groningen, Groningen, The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008655688","display_name":"Hamidreza Kasaei","orcid":"https://orcid.org/0000-0001-9408-7730"},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Hamidreza Kasaei","raw_affiliation_strings":["University of Groningen,Department of Artificial Intelligence,Groningen,The Netherlands","Department of Artificial Intelligence, University of Groningen, Groningen, The Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Groningen,Department of Artificial Intelligence,Groningen,The Netherlands","institution_ids":["https://openalex.org/I169381384"]},{"raw_affiliation_string":"Department of Artificial Intelligence, University of Groningen, Groningen, The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101187848"],"corresponding_institution_ids":["https://openalex.org/I169381384"],"apc_list":null,"apc_paid":null,"fwci":1.2372,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.82335173,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"5751","last_page":"5757"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7924965620040894},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7292410135269165},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.7112084627151489},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.5570995211601257},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5284100770950317},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5237557888031006},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.5172953605651855},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5116446614265442},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.501009464263916},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4793972969055176},{"id":"https://openalex.org/keywords/cognitive-neuroscience-of-visual-object-recognition","display_name":"Cognitive neuroscience of visual object recognition","score":0.46355050802230835},{"id":"https://openalex.org/keywords/economic-shortage","display_name":"Economic shortage","score":0.45511889457702637},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.37283942103385925},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37181442975997925},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.321115642786026},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06784296035766602}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7924965620040894},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7292410135269165},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.7112084627151489},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.5570995211601257},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5284100770950317},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5237557888031006},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.5172953605651855},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5116446614265442},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.501009464263916},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4793972969055176},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.46355050802230835},{"id":"https://openalex.org/C194051981","wikidata":"https://www.wikidata.org/wiki/Q1337691","display_name":"Economic shortage","level":3,"score":0.45511889457702637},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.37283942103385925},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37181442975997925},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.321115642786026},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06784296035766602},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C2778137410","wikidata":"https://www.wikidata.org/wiki/Q2732820","display_name":"Government (linguistics)","level":2,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iros55552.2023.10342235","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros55552.2023.10342235","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.rug.nl:publications/f05bc75e-9720-4799-bee5-c698cc9609ad","is_oa":true,"landing_page_url":"https://research.rug.nl/en/publications/f05bc75e-9720-4799-bee5-c698cc9609ad","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Xiong, S, Tziafas, G & Kasaei, H 2023, Enhancing Fine-Grained 3D Object Recognition Using Hybrid Multi-Modal Vision Transformer-CNN Models. in 2023 IEEE/RSJ International Conference on Intelligent Robots and Systems, IROS 2023. IEEE International Conference on Intelligent Robots and Systems, Institute of Electrical and Electronics Engineers Inc., pp. 5751-5757, 2023 IEEE/RSJ International Conference on Intelligent Robots and Systems, IROS 2023, Detroit, United States, 01/10/2023. https://doi.org/10.1109/IROS55552.2023.10342235","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.rug.nl:openaire/f05bc75e-9720-4799-bee5-c698cc9609ad","is_oa":true,"landing_page_url":"https://hdl.handle.net/11370/f05bc75e-9720-4799-bee5-c698cc9609ad","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Xiong, S, Tziafas, G & Kasaei, H 2023, Enhancing Fine-Grained 3D Object Recognition Using Hybrid Multi-Modal Vision Transformer-CNN Models. in 2023 IEEE/RSJ International Conference on Intelligent Robots and Systems, IROS 2023. IEEE International Conference on Intelligent Robots and Systems, Institute of Electrical and Electronics Engineers Inc., pp. 5751-5757, 2023 IEEE/RSJ International Conference on Intelligent Robots and Systems, IROS 2023, Detroit, United States, 01/10/2023. https://doi.org/10.1109/IROS55552.2023.10342235","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:pure.rug.nl:publications/f05bc75e-9720-4799-bee5-c698cc9609ad","is_oa":true,"landing_page_url":"https://research.rug.nl/en/publications/f05bc75e-9720-4799-bee5-c698cc9609ad","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Xiong, S, Tziafas, G & Kasaei, H 2023, Enhancing Fine-Grained 3D Object Recognition Using Hybrid Multi-Modal Vision Transformer-CNN Models. in 2023 IEEE/RSJ International Conference on Intelligent Robots and Systems, IROS 2023. IEEE International Conference on Intelligent Robots and Systems, Institute of Electrical and Electronics Engineers Inc., pp. 5751-5757, 2023 IEEE/RSJ International Conference on Intelligent Robots and Systems, IROS 2023, Detroit, United States, 01/10/2023. https://doi.org/10.1109/IROS55552.2023.10342235","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320933","display_name":"Rijksuniversiteit Groningen","ror":"https://ror.org/012p63287"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W125693051","https://openalex.org/W964460774","https://openalex.org/W1797268635","https://openalex.org/W1846799578","https://openalex.org/W1944630830","https://openalex.org/W1972630525","https://openalex.org/W2008213039","https://openalex.org/W2074099390","https://openalex.org/W2118585731","https://openalex.org/W2124940540","https://openalex.org/W2138011018","https://openalex.org/W2150856297","https://openalex.org/W2156222070","https://openalex.org/W2219155316","https://openalex.org/W2501219359","https://openalex.org/W2569298377","https://openalex.org/W2618530766","https://openalex.org/W2740558313","https://openalex.org/W2763070548","https://openalex.org/W2774116089","https://openalex.org/W2780418809","https://openalex.org/W2789835518","https://openalex.org/W2883502031","https://openalex.org/W2895359088","https://openalex.org/W2896457183","https://openalex.org/W2902986194","https://openalex.org/W2911964244","https://openalex.org/W2914924014","https://openalex.org/W2920456178","https://openalex.org/W2963446712","https://openalex.org/W2963918968","https://openalex.org/W2964110616","https://openalex.org/W2986456235","https://openalex.org/W3034687313","https://openalex.org/W3096609285","https://openalex.org/W3115390238","https://openalex.org/W3124312618","https://openalex.org/W3129197028","https://openalex.org/W3136875090","https://openalex.org/W3138516171","https://openalex.org/W3139434170","https://openalex.org/W3168649818","https://openalex.org/W3213192039","https://openalex.org/W4206237889","https://openalex.org/W4211049957","https://openalex.org/W4238861924","https://openalex.org/W4285102264","https://openalex.org/W4287022992","https://openalex.org/W4297957988","https://openalex.org/W4313156423","https://openalex.org/W4401149187","https://openalex.org/W6631943919","https://openalex.org/W6638677478","https://openalex.org/W6677656871","https://openalex.org/W6755207826","https://openalex.org/W6759022161","https://openalex.org/W6767612737","https://openalex.org/W6788023325","https://openalex.org/W6788135285","https://openalex.org/W6790732931","https://openalex.org/W6800217721","https://openalex.org/W6898608453"],"related_works":["https://openalex.org/W2509147714","https://openalex.org/W2579473328","https://openalex.org/W46955353","https://openalex.org/W4239898203","https://openalex.org/W2955295882","https://openalex.org/W2603666417","https://openalex.org/W2145869671","https://openalex.org/W2563217655","https://openalex.org/W2357816140","https://openalex.org/W2586833044"],"abstract_inverted_index":{"Robots":[0],"operating":[1],"in":[2,20,36,60,203],"human-centered":[3],"environments,":[4],"such":[5],"as":[6,198],"retail":[7],"stores,":[8],"restaurants,":[9],"and":[10,43,76,135,147,156,161,182,194,206],"households,":[11],"are":[12],"often":[13],"required":[14],"to":[15,39,82,112,174,178],"distinguish":[16],"between":[17],"similar":[18],"objects":[19],"different":[21],"contexts":[22],"with":[23,114,190],"a":[24,34,57,70,115,151,191,199],"high":[25,41],"degree":[26],"of":[27,52,86,95,108,117,154],"accuracy.":[28],"However,":[29],"fine-grained":[30,53,87,200],"object":[31],"recognition":[32,152],"remains":[33],"challenge":[35],"robotics":[37],"due":[38],"the":[40,49,84,93,121,136,159,175],"intra-category":[42],"low":[44],"inter-category":[45],"dissimilarities.":[46],"In":[47,65],"addition,":[48],"limited":[50],"number":[51],"3D":[54,97],"datasets":[55,172],"poses":[56],"significant":[58],"problem":[59],"addressing":[61],"this":[62,66],"issue":[63],"effectively.":[64],"paper,":[67],"we":[68,99,166,185],"propose":[69],"hybrid":[71,141],"multi-modal":[72,142],"Vision":[73],"Transformer":[74],"(ViT)":[75],"Convolutional":[77],"Neural":[78],"Networks":[79],"(CNN)":[80],"approach":[81,129],"improve":[83],"performance":[85],"visual":[88],"classification":[89],"(FGVC).":[90],"To":[91],"address":[92],"shortage":[94],"FGVC":[96,170],"datasets,":[98,134,163],"generated":[100],"two":[101],"synthetic":[102],"datasets.":[103],"The":[104],"first":[105],"dataset":[106,123],"consists":[107],"20":[109],"categories":[110],"related":[111],"restaurants":[113],"total":[116],"100":[118],"instances,":[119],"while":[120],"second":[122],"contains":[124],"120":[125],"shoe":[126,162],"instances.":[127],"Our":[128],"was":[130],"evaluated":[131],"on":[132,158],"both":[133,145,204],"results":[137],"indicate":[138],"that":[139],"our":[140,169,187],"model":[143],"outperforms":[144],"CNN-only":[146],"ViT-only":[148],"baselines,":[149],"achieving":[150],"accuracy":[153],"94.50%":[155],"93.51%":[157],"restaurant":[160],"respectively.":[164],"Additionally,":[165],"have":[167],"made":[168],"RGB-D":[171],"available":[173],"research":[176],"community":[177],"enable":[179],"further":[180],"experimentation":[181],"advancement.":[183],"Furthermore,":[184],"integrated":[186],"proposed":[188],"method":[189],"robot":[192],"framework":[193],"demonstrated":[195],"its":[196],"potential":[197],"perception":[201],"tool":[202],"simulated":[205],"real-world":[207],"robotic":[208],"scenarios.":[209]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-04T09:10:02.777135","created_date":"2025-10-10T00:00:00"}
