{"id":"https://openalex.org/W2147673046","doi":"https://doi.org/10.1109/iros.2004.1389768","title":"Robust speech interface based on audio and video information fusion for humanoid HRP-2","display_name":"Robust speech interface based on audio and video information fusion for humanoid HRP-2","publication_year":2005,"publication_date":"2005-04-01","ids":{"openalex":"https://openalex.org/W2147673046","doi":"https://doi.org/10.1109/iros.2004.1389768","mag":"2147673046"},"language":"en","primary_location":{"id":"doi:10.1109/iros.2004.1389768","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2004.1389768","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2004 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS) (IEEE Cat. No.04CH37566)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109240590","display_name":"Isao Ham","orcid":null},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Isao Ham","raw_affiliation_strings":["Inf. Tech. Res. Inst., AIST, Tsukuba, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Inf. Tech. Res. Inst., AIST, Tsukuba, Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111950771","display_name":"F. Asano","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"F. Asano","raw_affiliation_strings":["Information Technology Research Institute, AIST, Tsukuba, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Technology Research Institute, AIST, Tsukuba, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060133835","display_name":"Yoshihiro Kawai","orcid":"https://orcid.org/0000-0002-9847-0072"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Y. Kawai","raw_affiliation_strings":["Intelligent Systems Institute, AIST, Tsukuba, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intelligent Systems Institute, AIST, Tsukuba, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071854390","display_name":"F. Kanchiro","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"F. Kanchiro","raw_affiliation_strings":["Information Technology Research Institute, AIST, Tsukuba, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Technology Research Institute, AIST, Tsukuba, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016899140","display_name":"Kiyoshi Yamamoto","orcid":"https://orcid.org/0000-0001-9351-5174"},"institutions":[{"id":"https://openalex.org/I146399215","display_name":"University of Tsukuba","ror":"https://ror.org/02956yf07","country_code":"JP","type":"education","lineage":["https://openalex.org/I146399215"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"K. Yamamoto","raw_affiliation_strings":["Tsukuba University, Tsukuba, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsukuba University, Tsukuba, Japan","institution_ids":["https://openalex.org/I146399215"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073671259","display_name":"Hideki Asoh","orcid":"https://orcid.org/0000-0002-0891-3782"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"H. Asoh","raw_affiliation_strings":["Information Technology Research Institute, AIST, Tsukuba, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Technology Research Institute, AIST, Tsukuba, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006504365","display_name":"Jun Ogata","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jun Ogata","raw_affiliation_strings":["Information Technology Research Institute, AIST, Tsukuba, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Technology Research Institute, AIST, Tsukuba, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5096841033","display_name":"N. Ichintura","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"N. Ichintura","raw_affiliation_strings":["Intelligent Systems Institute, AIST, Tsukuba, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intelligent Systems Institute, AIST, Tsukuba, Japan","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001711571","display_name":"Hirohisa Hirukawa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"H. Hirukawa","raw_affiliation_strings":["Intelligent Systems Institute, AIST, Tsukuba, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intelligent Systems Institute, AIST, Tsukuba, Japan","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":14.257,"has_fulltext":false,"cited_by_count":113,"citation_normalized_percentile":{"value":0.99363588,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"3","issue":null,"first_page":"2404","last_page":"2410"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8059952259063721},{"id":"https://openalex.org/keywords/microphone-array","display_name":"Microphone array","score":0.6983234286308289},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.5975682139396667},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5702999830245972},{"id":"https://openalex.org/keywords/humanoid-robot","display_name":"Humanoid robot","score":0.5246962904930115},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.49289393424987793},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.48915722966194153},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.48664936423301697},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.46715277433395386},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46499764919281006},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4123052954673767}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8059952259063721},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.6983234286308289},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.5975682139396667},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5702999830245972},{"id":"https://openalex.org/C60692881","wikidata":"https://www.wikidata.org/wiki/Q584529","display_name":"Humanoid robot","level":3,"score":0.5246962904930115},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.49289393424987793},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.48915722966194153},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.48664936423301697},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.46715277433395386},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46499764919281006},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4123052954673767},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros.2004.1389768","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2004.1389768","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2004 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS) (IEEE Cat. No.04CH37566)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.41999998688697815}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W106296304","https://openalex.org/W1869758581","https://openalex.org/W1963844140","https://openalex.org/W2071795835","https://openalex.org/W2113638573","https://openalex.org/W2132103241","https://openalex.org/W2147347395","https://openalex.org/W2166162370","https://openalex.org/W2320053039","https://openalex.org/W6681546959"],"related_works":["https://openalex.org/W1879255185","https://openalex.org/W2769861442","https://openalex.org/W2120442551","https://openalex.org/W1980506188","https://openalex.org/W2900122540","https://openalex.org/W4240587264","https://openalex.org/W2011788874","https://openalex.org/W2041060376","https://openalex.org/W2122030153","https://openalex.org/W2963983801"],"abstract_inverted_index":{"For":[0],"cooperative":[1],"work":[2],"of":[3,48,59,69,103,108,154,185],"robots":[4,35],"and":[5,61,71,85,146,156],"humans":[6,44],"in":[7,26,115,178],"the":[8,67,101,130,134,152,166,186],"real":[9,29],"world,":[10],"a":[11,24,27,46,57,82,91,96,143,147,179],"communicative":[12],"function":[13,25],"based":[14,65],"on":[15,66,151],"speech":[16,41,63,104,110],"is":[17,32,112],"indispensable":[18],"for":[19,128,159],"robots.":[20],"To":[21],"realize":[22],"such":[23],"noisy":[28],"environment,":[30],"it":[31,174],"essential":[33],"that":[34,173],"be":[36,176],"able":[37],"to":[38,99,133],"extract":[39],"target":[40],"spoken":[42],"by":[43,50,95],"from":[45],"mixture":[47],"sounds":[49],"their":[51],"own":[52],"resources.":[53],"We":[54],"have":[55],"developed":[56],"method":[58],"detecting":[60],"extracting":[62],"events":[64,111],"fusion":[68],"audio":[70,77],"video":[72,86],"information.":[73],"In":[74,122],"this":[75,123],"method,":[76],"information":[78,87,107],"(sound":[79],"localization":[80],"using":[81,90,118],"microphone":[83,144],"array)":[84],"(human":[88],"tracking":[89,168],"camera)":[92],"are":[93,138],"fused":[94],"Bayesian":[97],"network":[98],"enable":[100],"detection":[102],"events.":[105],"The":[106],"detected":[109],"then":[113],"utilized":[114],"sound":[116,160],"separation":[117],"adaptive":[119],"beam":[120],"forming.":[121],"paper,":[124],"some":[125],"basic":[126],"investigations":[127],"applying":[129],"above":[131],"system":[132,169,187],"humanoid":[135],"robot":[136],"HRP-2":[137],"reported.":[139],"Input":[140],"devices,":[141],"namely":[142],"array":[145],"camera,":[148],"were":[149,163],"mounted":[150],"head":[153],"HRP-2,":[155],"acoustic":[157],"characteristics":[158],"localization/separation":[161],"performance":[162,184],"investigated.":[164],"Also,":[165],"human":[167],"was":[170,188],"improved":[171],"so":[172],"can":[175],"used":[177],"dynamic":[180],"situation.":[181],"Finally,":[182],"overall":[183],"tested":[189],"via":[190],"off-line":[191],"experiments.":[192]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":7}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
