{"id":"https://openalex.org/W4221141683","doi":"https://doi.org/10.1109/icassp43922.2022.9746336","title":"Audio-Visual Object Classification for Human-Robot Collaboration","display_name":"Audio-Visual Object Classification for Human-Robot Collaboration","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4221141683","doi":"https://doi.org/10.1109/icassp43922.2022.9746336"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746336","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746336","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://infoscience.epfl.ch/record/299331","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078453520","display_name":"Alessio Xompero","orcid":"https://orcid.org/0000-0002-8227-8529"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"A. Xompero","raw_affiliation_strings":["Queen Mary University of London,UK","Queen Mary University of London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,UK","institution_ids":["https://openalex.org/I166337079"]},{"raw_affiliation_string":"Queen Mary University of London, UK","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085979475","display_name":"Yanxu Pang","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Y. L. Pang","raw_affiliation_strings":["Queen Mary University of London,UK","Queen Mary University of London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,UK","institution_ids":["https://openalex.org/I166337079"]},{"raw_affiliation_string":"Queen Mary University of London, UK","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079387432","display_name":"T. Patten","orcid":null},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"T. Patten","raw_affiliation_strings":["University of Technology Sydney,Australia","University of Technology Sydney, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Technology Sydney,Australia","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"University of Technology Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034558174","display_name":"A. Prabhakar","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"A. Prabhakar","raw_affiliation_strings":["&#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne,Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"&#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne,Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057208375","display_name":"B. Calli","orcid":null},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"B. Calli","raw_affiliation_strings":["Worcester Polytechnic Institute,USA","Worcester Polytechnic Institute, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Worcester Polytechnic Institute,USA","institution_ids":["https://openalex.org/I107077323"]},{"raw_affiliation_string":"Worcester Polytechnic Institute, USA","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049919186","display_name":"A. Cavallaro","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"A. Cavallaro","raw_affiliation_strings":["Queen Mary University of London,UK","Queen Mary University of London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,UK","institution_ids":["https://openalex.org/I166337079"]},{"raw_affiliation_string":"Queen Mary University of London, UK","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.0522,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.69742403,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"37","issue":null,"first_page":"9137","last_page":"9141"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7669709920883179},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.7514734864234924},{"id":"https://openalex.org/keywords/container","display_name":"Container (type theory)","score":0.7321426272392273},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6766098737716675},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6447794437408447},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5890403389930725},{"id":"https://openalex.org/keywords/pose","display_name":"Pose","score":0.5876651406288147},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5433961749076843},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.518441915512085},{"id":"https://openalex.org/keywords/human\u2013robot-interaction","display_name":"Human\u2013robot interaction","score":0.5080380439758301},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4956604242324829},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.4532540440559387},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.43856775760650635},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.42060309648513794},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3915708661079407},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.15396225452423096},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.0916261374950409}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7669709920883179},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.7514734864234924},{"id":"https://openalex.org/C2781018962","wikidata":"https://www.wikidata.org/wiki/Q5164884","display_name":"Container (type theory)","level":2,"score":0.7321426272392273},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6766098737716675},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6447794437408447},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5890403389930725},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.5876651406288147},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5433961749076843},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.518441915512085},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.5080380439758301},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4956604242324829},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.4532540440559387},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.43856775760650635},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.42060309648513794},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3915708661079407},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.15396225452423096},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0916261374950409},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746336","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746336","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:infoscience.epfl.ch:299331","is_oa":true,"landing_page_url":"http://infoscience.epfl.ch/record/299331","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"WoS","raw_type":"conference proceedings"}],"best_oa_location":{"id":"pmh:oai:infoscience.epfl.ch:299331","is_oa":true,"landing_page_url":"http://infoscience.epfl.ch/record/299331","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"WoS","raw_type":"conference proceedings"},"sustainable_development_goals":[{"score":0.4399999976158142,"id":"https://metadata.un.org/sdg/2","display_name":"Zero hunger"}],"awards":[{"id":"https://openalex.org/G5628197298","display_name":null,"funder_award_id":"EP/S031715/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G5892560885","display_name":"Robot In-hand Dexterous manipulation by extracting data from human manipulation of objects to improve robotic autonomy and dexterity","funder_award_id":"ANR-18-CHR3-0004","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"}],"funders":[{"id":"https://openalex.org/F4320320883","display_name":"Agence Nationale de la Recherche","ror":"https://ror.org/00rbzpz17"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W2064675550","https://openalex.org/W2569141289","https://openalex.org/W2962745199","https://openalex.org/W3001418026","https://openalex.org/W3006978142","https://openalex.org/W3015249398","https://openalex.org/W3089045049","https://openalex.org/W3094502228","https://openalex.org/W3112437495","https://openalex.org/W3132357664","https://openalex.org/W3132496303","https://openalex.org/W3132647604","https://openalex.org/W3134505513","https://openalex.org/W3146411767","https://openalex.org/W3172917672","https://openalex.org/W3194569790","https://openalex.org/W3195780658","https://openalex.org/W3205994030","https://openalex.org/W3215920455","https://openalex.org/W4221151562","https://openalex.org/W4224932139","https://openalex.org/W4226376635","https://openalex.org/W4297775537","https://openalex.org/W6737664043","https://openalex.org/W6784333009","https://openalex.org/W6804216487","https://openalex.org/W6925342505"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W2367301169","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W2352134912","https://openalex.org/W3137890128","https://openalex.org/W1984634519"],"abstract_inverted_index":{"Human-robot":[0],"collaboration":[1],"requires":[2],"the":[3,7,38,58,61,75,83,86,98,101,104,110,114,117,128,138],"contactless":[4],"estimation":[5,102,141],"of":[6,10,41,60,85,91,97,103,109,116,121,127,140],"physical":[8,39],"properties":[9,40],"containers":[11],"manipulated":[12],"by":[13,57],"a":[14,22,26,79,89],"person,":[15],"for":[16,134],"example":[17],"while":[18],"pouring":[19],"content":[20],"in":[21,48,69,143],"cup":[23],"or":[24],"moving":[25],"food":[27],"box.":[28],"Acoustic":[29],"and":[30,51,53,66,78,107,113,119,136],"visual":[31],"signals":[32],"can":[33],"be":[34,55],"used":[35],"to":[36,81],"estimate":[37],"such":[42],"objects,":[43],"which":[44],"may":[45],"vary":[46],"substantially":[47],"shape,":[49],"material":[50],"size,":[52],"also":[54],"occluded":[56],"hands":[59],"person.":[62],"To":[63],"facilitate":[64],"comparisons":[65],"stimulate":[67],"progress":[68],"solving":[70],"this":[71],"problem,":[72],"we":[73],"present":[74],"CORSMAL":[76],"challenge":[77,99,129],"dataset":[80],"assess":[82],"performance":[84,93],"algorithms":[87],"through":[88],"set":[90],"well-defined":[92],"scores.":[94],"The":[95],"tasks":[96],"are":[100],"mass,":[105],"capacity,":[106],"dimensions":[108],"object":[111],"(container),":[112],"classification":[115],"type":[118],"amount":[120],"its":[122],"content.":[123],"A":[124],"novel":[125],"feature":[126],"is":[130],"our":[131],"real-to-simulation":[132],"framework":[133],"visualising":[135],"assessing":[137],"impact":[139],"errors":[142],"human-to-robot":[144],"handovers.":[145]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2023,"cited_by_count":3}],"updated_date":"2026-06-20T22:02:38.213706","created_date":"2025-10-10T00:00:00"}
