{"id":"https://openalex.org/W1987454298","doi":"https://doi.org/10.1109/icra.2014.6907840","title":"Audio-visual keyword spotting based on adaptive decision fusion under noisy conditions for human-robot interaction","display_name":"Audio-visual keyword spotting based on adaptive decision fusion under noisy conditions for human-robot interaction","publication_year":2014,"publication_date":"2014-05-01","ids":{"openalex":"https://openalex.org/W1987454298","doi":"https://doi.org/10.1109/icra.2014.6907840","mag":"1987454298"},"language":"en","primary_location":{"id":"doi:10.1109/icra.2014.6907840","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2014.6907840","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100410286","display_name":"Hong Liu","orcid":"https://orcid.org/0000-0002-0896-8409"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hong Liu","raw_affiliation_strings":["Engineering Lab on Intelligent Perception for Internet of Things(ELIP), Peking University, Beijing, CHINA"],"affiliations":[{"raw_affiliation_string":"Engineering Lab on Intelligent Perception for Internet of Things(ELIP), Peking University, Beijing, CHINA","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061734606","display_name":"Ting Fan","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ting Fan","raw_affiliation_strings":["Engineering Lab on Intelligent Perception for Internet of Things(ELIP), Shenzhen Graduate School of Peking University, Shenzhen, CHINA"],"affiliations":[{"raw_affiliation_string":"Engineering Lab on Intelligent Perception for Internet of Things(ELIP), Shenzhen Graduate School of Peking University, Shenzhen, CHINA","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101459282","display_name":"Pingping Wu","orcid":"https://orcid.org/0000-0002-7028-4200"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pingping Wu","raw_affiliation_strings":["Engineering Lab on Intelligent Perception for Internet of Things(ELIP), Shenzhen Graduate School of Peking University, Shenzhen, CHINA"],"affiliations":[{"raw_affiliation_string":"Engineering Lab on Intelligent Perception for Internet of Things(ELIP), Shenzhen Graduate School of Peking University, Shenzhen, CHINA","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100410286"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":1.4703,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.81700132,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"6644","last_page":"6651"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.9438493251800537},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.8188555240631104},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.813572883605957},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6185671091079712},{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.6020917296409607},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.5769761204719543},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5472356081008911},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4977705776691437},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.43653759360313416},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.37704595923423767}],"concepts":[{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.9438493251800537},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.8188555240631104},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.813572883605957},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6185671091079712},{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.6020917296409607},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.5769761204719543},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5472356081008911},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4977705776691437},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.43653759360313416},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.37704595923423767},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra.2014.6907840","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2014.6907840","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.800000011920929,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1520736967","https://openalex.org/W1530111162","https://openalex.org/W1664248204","https://openalex.org/W2015132652","https://openalex.org/W2028547769","https://openalex.org/W2036205336","https://openalex.org/W2044381054","https://openalex.org/W2051676197","https://openalex.org/W2064347532","https://openalex.org/W2096703356","https://openalex.org/W2100191526","https://openalex.org/W2100930826","https://openalex.org/W2122797512","https://openalex.org/W2125838338","https://openalex.org/W2127025755","https://openalex.org/W2134723767","https://openalex.org/W2136155248","https://openalex.org/W2139916508","https://openalex.org/W2150444217","https://openalex.org/W2150946027","https://openalex.org/W2155289555","https://openalex.org/W2157827878","https://openalex.org/W2163352848","https://openalex.org/W2163973301","https://openalex.org/W2322283567","https://openalex.org/W2405666970","https://openalex.org/W4285719527","https://openalex.org/W4301408009","https://openalex.org/W6631338322","https://openalex.org/W6637145857","https://openalex.org/W6679949672","https://openalex.org/W6682704335","https://openalex.org/W6713568955"],"related_works":["https://openalex.org/W2918559346","https://openalex.org/W2114097550","https://openalex.org/W3119978414","https://openalex.org/W2516975559","https://openalex.org/W2545741539","https://openalex.org/W3206647229","https://openalex.org/W4286904253","https://openalex.org/W1969408022","https://openalex.org/W2000885660","https://openalex.org/W1483316057"],"abstract_inverted_index":{"Keyword":[0],"spotting":[1,68,161],"(KWS)":[2],"deals":[3],"with":[4,38],"the":[5,28,49,59,85,151,168],"identification":[6],"of":[7,31,89,150],"keywords":[8],"in":[9],"unconstrained":[10],"speech,":[11],"which":[12],"is":[13,78,102,116],"a":[14,64,105,144],"natural,":[15],"straightforward":[16],"and":[17,87,112,118,122,134,171],"friendly":[18],"way":[19],"for":[20,125,131],"human-robot":[21],"interaction":[22],"(HRI).":[23],"Most":[24],"keyword":[25,67,114,127,160],"spotters":[26],"have":[27],"common":[29],"problem":[30],"noise-robustness":[32],"when":[33],"applied":[34],"to":[35,56,82],"real-world":[36],"environment":[37],"dramatically":[39],"changing":[40],"noises.":[41],"Since":[42],"visual":[43,113,123,135],"information":[44],"won't":[45],"be":[46,54],"affected":[47],"by":[48],"acoustic":[50,111,121,133],"noise,":[51],"it":[52],"can":[53],"utilized":[55],"complementarily":[57],"improve":[58],"noise-robustness.":[60],"In":[61,80],"this":[62],"paper,":[63],"novel":[65],"audio-visual":[66,159],"approach":[69],"based":[70,109,147,162,178],"on":[71,110,148,163],"adaptive":[72],"decision":[73,164],"fusion":[74,165,177],"under":[75,137],"noisy":[76],"conditions":[77,140],"proposed.":[79,103],"order":[81],"accurately":[83],"represent":[84],"appearance":[86],"movement":[88],"mouth":[90],"region,":[91],"an":[92],"improved":[93],"local":[94],"binary":[95],"pattern":[96],"from":[97],"three":[98],"orthogonal":[99],"planes":[100],"(ILBP-TOP)":[101],"Besides,":[104],"parallel":[106],"two-step":[107],"recognition":[108],"candidates":[115],"conducted":[117],"generates":[119],"corresponding":[120],"scores":[124],"each":[126],"candidate.":[128],"Optimal":[129],"weights":[130],"combining":[132],"contributions":[136],"diverse":[138],"noise":[139,169],"are":[141],"generated":[142],"using":[143],"neural":[145],"network":[146],"reliabilities":[149],"two":[152],"modalities.":[153],"Experiments":[154],"show":[155],"that":[156],"our":[157],"proposed":[158],"significantly":[166],"improves":[167],"robustness":[170],"attains":[172],"better":[173],"performance":[174,186],"than":[175,187],"feature":[176],"audiovisual":[179],"spotter.":[180],"Additionally,":[181],"ILBP-TOP":[182],"shows":[183],"more":[184],"competitive":[185],"LBP-TOP.":[188]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":1}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
