{"id":"https://openalex.org/W2207851367","doi":"https://doi.org/10.1109/iros.2015.7353974","title":"Utilizing visual cues in robot audition for sound source discrimination in speech-based human-robot communication","display_name":"Utilizing visual cues in robot audition for sound source discrimination in speech-based human-robot communication","publication_year":2015,"publication_date":"2015-09-01","ids":{"openalex":"https://openalex.org/W2207851367","doi":"https://doi.org/10.1109/iros.2015.7353974","mag":"2207851367"},"language":"en","primary_location":{"id":"doi:10.1109/iros.2015.7353974","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2015.7353974","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052470310","display_name":"Randy G\u00f3mez","orcid":"https://orcid.org/0000-0002-3191-6818"},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Randy Gomez","raw_affiliation_strings":["Honda Research Institute Japan Co. Ltd., Wako-shi, Saitama, JAPAN"],"affiliations":[{"raw_affiliation_string":"Honda Research Institute Japan Co. Ltd., Wako-shi, Saitama, JAPAN","institution_ids":["https://openalex.org/I1283473643"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079913820","display_name":"Levko Ivanchuk","orcid":null},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Levko Ivanchuk","raw_affiliation_strings":["Honda Research Institute Japan Co. Ltd., Wako-shi, Saitama, JAPAN"],"affiliations":[{"raw_affiliation_string":"Honda Research Institute Japan Co. Ltd., Wako-shi, Saitama, JAPAN","institution_ids":["https://openalex.org/I1283473643"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049329632","display_name":"Keisuke Nakamura","orcid":"https://orcid.org/0000-0002-4979-2083"},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Keisuke Nakamura","raw_affiliation_strings":["Honda Research Institute Japan Co. Ltd., Wako-shi, Saitama, JAPAN"],"affiliations":[{"raw_affiliation_string":"Honda Research Institute Japan Co. Ltd., Wako-shi, Saitama, JAPAN","institution_ids":["https://openalex.org/I1283473643"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110835440","display_name":"Takeshi Mizumoto","orcid":null},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takeshi Mizumoto","raw_affiliation_strings":["Honda Research Institute Japan Co. Ltd., Wako-shi, Saitama, JAPAN"],"affiliations":[{"raw_affiliation_string":"Honda Research Institute Japan Co. Ltd., Wako-shi, Saitama, JAPAN","institution_ids":["https://openalex.org/I1283473643"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091708408","display_name":"Kazuhiro Nakadai","orcid":"https://orcid.org/0000-0002-6134-4558"},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kazuhiro Nakadai","raw_affiliation_strings":["Honda Research Institute Japan Co. Ltd., Wako-shi, Saitama, JAPAN"],"affiliations":[{"raw_affiliation_string":"Honda Research Institute Japan Co. Ltd., Wako-shi, Saitama, JAPAN","institution_ids":["https://openalex.org/I1283473643"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5052470310"],"corresponding_institution_ids":["https://openalex.org/I1283473643"],"apc_list":null,"apc_paid":null,"fwci":0.8615,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.74811964,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"2","issue":null,"first_page":"4216","last_page":"4222"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6938988566398621},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6566068530082703},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6209174990653992},{"id":"https://openalex.org/keywords/human\u2013robot-interaction","display_name":"Human\u2013robot interaction","score":0.5594644546508789},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.549182116985321},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.5279326438903809},{"id":"https://openalex.org/keywords/sensory-cue","display_name":"Sensory cue","score":0.4481625556945801},{"id":"https://openalex.org/keywords/acoustic-source-localization","display_name":"Acoustic source localization","score":0.4446813464164734},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.4434756636619568},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.43365564942359924},{"id":"https://openalex.org/keywords/background-noise","display_name":"Background noise","score":0.41061896085739136},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32089945673942566},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.18671858310699463},{"id":"https://openalex.org/keywords/sound","display_name":"Sound (geography)","score":0.14333656430244446},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.12839862704277039},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.10382989048957825},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.06386512517929077}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6938988566398621},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6566068530082703},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6209174990653992},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.5594644546508789},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.549182116985321},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.5279326438903809},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.4481625556945801},{"id":"https://openalex.org/C93240960","wikidata":"https://www.wikidata.org/wiki/Q217270","display_name":"Acoustic source localization","level":3,"score":0.4446813464164734},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4434756636619568},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.43365564942359924},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.41061896085739136},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32089945673942566},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.18671858310699463},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.14333656430244446},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.12839862704277039},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.10382989048957825},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.06386512517929077},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros.2015.7353974","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2015.7353974","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6200000047683716,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W82800962","https://openalex.org/W199515402","https://openalex.org/W1512562331","https://openalex.org/W2016565202","https://openalex.org/W2017519504","https://openalex.org/W2020660155","https://openalex.org/W2028592945","https://openalex.org/W2053241019","https://openalex.org/W2069432516","https://openalex.org/W2098479385","https://openalex.org/W2102072896","https://openalex.org/W2104422351","https://openalex.org/W2106259503","https://openalex.org/W2110034964","https://openalex.org/W2113416157","https://openalex.org/W2121065047","https://openalex.org/W2121788372","https://openalex.org/W2128653836","https://openalex.org/W2128847674","https://openalex.org/W2136197758","https://openalex.org/W2152949589","https://openalex.org/W2172226263","https://openalex.org/W2911394281","https://openalex.org/W2970850616","https://openalex.org/W4230562743","https://openalex.org/W6603434965","https://openalex.org/W6608016876","https://openalex.org/W6678139903","https://openalex.org/W6680064849","https://openalex.org/W6682371270","https://openalex.org/W6758139422","https://openalex.org/W6767055042"],"related_works":["https://openalex.org/W2069517613","https://openalex.org/W4386336391","https://openalex.org/W2383417046","https://openalex.org/W3198667862","https://openalex.org/W3171794711","https://openalex.org/W3217417050","https://openalex.org/W2248536413","https://openalex.org/W2187296505","https://openalex.org/W3036714475","https://openalex.org/W77875850"],"abstract_inverted_index":{"It":[0,92],"is":[1,13,29,44,77,84,227],"easy":[2],"for":[3,31,47,79,105],"human":[4,32],"beings":[5,33],"to":[6,34,63,96,102,118,136,144,163,168,187,191],"discern":[7],"whether":[8],"an":[9,85,106],"observed":[10],"acoustic":[11,27,74,201],"signal":[12,101,124,149],"a":[14,65,133,138],"direct":[15,66,122,231],"speech,":[16],"reflected":[17],"speech":[18,67,100,123,148,156,232,234],"or":[19],"noise":[20,127,160,238],"through":[21],"simple":[22],"listening.":[23],"Relying":[24],"purely":[25],"on":[26,200],"cues":[28,190],"enough":[30],"discriminate":[35,145],"between":[36],"the":[37,53,69,94,98,103,121,146,153,193],"different":[38],"kinds":[39],"of":[40,120,141,209],"sound":[41,71],"sources":[42,72],"which":[43,197],"not":[45],"straightforward":[46],"machines.":[48],"A":[49],"robot":[50,55,95,134,173,195],"equipped":[51],"with":[52],"current":[54],"audition":[56,83,174,196],"mechanism":[57],"in":[58,88,172,216,229],"most":[59],"cases,":[60],"will":[61,176],"fail":[62],"differentiate":[64],"from":[68,152,233],"other":[70],"because":[73],"information":[75,143],"alone":[76],"insufficient":[78],"effective":[80,107],"discrimination.":[81],"Robot":[82],"important":[86],"topic":[87],"speech-based":[89],"human-robot":[90,108,179],"communication.":[91,109],"enables":[93],"associate":[97],"incoming":[99],"user":[104],"In":[110,182],"challenging":[111],"environments,":[112],"this":[113,131,183],"task":[114],"becomes":[115],"difficult":[116],"due":[117],"reflections":[119,157,235],"and":[125,158,175,212,236],"background":[126,159,237],"sources.":[128,239],"To":[129],"counter":[130],"problem,":[132],"needs":[135],"have":[137],"minimum":[139],"amount":[140],"prior":[142],"valid":[147],"(direct":[150],"speech)":[151],"contaminants":[154],"(i.e.,":[155],"sources).":[161],"Failure":[162],"do":[164],"so":[165],"would":[166],"lead":[167],"false":[169],"speech-to-speaker":[170,210],"association":[171,211],"gravely":[177],"impact":[178],"communication":[180],"experience.":[181],"paper":[184],"we":[185],"propose":[186],"using":[188],"visual":[189],"augment":[192],"traditional":[194],"relies":[198],"solely":[199],"information.":[202],"The":[203],"proposed":[204],"method":[205],"significantly":[206],"improves":[207],"accuracy":[208],"machine":[213],"understanding":[214],"performance":[215],"real":[217],"environment":[218],"situation.":[219],"Experimental":[220],"results":[221],"show":[222],"that":[223],"our":[224],"expanded":[225],"system":[226],"robust":[228],"discriminating":[230]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
