{"id":"https://openalex.org/W191108438","doi":"https://doi.org/10.21437/interspeech.2009-636","title":"Robust speech recognition using VAD-measure-embedded decoder","display_name":"Robust speech recognition using VAD-measure-embedded decoder","publication_year":2009,"publication_date":"2009-09-06","ids":{"openalex":"https://openalex.org/W191108438","doi":"https://doi.org/10.21437/interspeech.2009-636","mag":"191108438"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2009-636","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2009-636","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2009","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100590172","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034306438","display_name":"Tasuku Oonishi","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tasuku Oonishi","raw_affiliation_strings":["Tokyo Institute of Technology, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049896100","display_name":"Paul R. Dixon","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Paul R. Dixon","raw_affiliation_strings":["Tokyo Institute of Technology, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084374665","display_name":"Koji Iwano","orcid":"https://orcid.org/0000-0002-7373-5353"},"institutions":[{"id":"https://openalex.org/I185088104","display_name":"Tokyo City University","ror":"https://ror.org/04dt6bw53","country_code":"JP","type":"education","lineage":["https://openalex.org/I185088104"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Koji Iwano","raw_affiliation_strings":["Tokyo City University, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tokyo City University, Tokyo, Japan","institution_ids":["https://openalex.org/I185088104"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009532108","display_name":"Sadaoki Furui","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Sadaoki Furui","raw_affiliation_strings":["Tokyo Institute of Technology, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3572,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.5267934,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"2239","last_page":"2242"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.8441557884216309},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.8239201307296753},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.796978771686554},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.46248918771743774},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.454396516084671},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.4459373950958252},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.43427830934524536},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.42725181579589844},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37702444195747375},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3291996717453003}],"concepts":[{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.8441557884216309},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.8239201307296753},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.796978771686554},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.46248918771743774},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.454396516084671},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.4459373950958252},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.43427830934524536},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.42725181579589844},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37702444195747375},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3291996717453003},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2009-636","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2009-636","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2009","raw_type":"proceedings-article"},{"id":"pmh:oai:t2r2.star.titech.ac.jp:50081620","is_oa":true,"landing_page_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100590172","pdf_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100590172","source":{"id":"https://openalex.org/S4377196385","display_name":"Tokyo Tech Research Repository (Tokyo Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114531698","host_organization_name":"Tokyo Institute of Technology","host_organization_lineage":["https://openalex.org/I114531698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":{"id":"pmh:oai:t2r2.star.titech.ac.jp:50081620","is_oa":true,"landing_page_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100590172","pdf_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100590172","source":{"id":"https://openalex.org/S4377196385","display_name":"Tokyo Tech Research Repository (Tokyo Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114531698","host_organization_name":"Tokyo Institute of Technology","host_organization_lineage":["https://openalex.org/I114531698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5699999928474426}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W191108438.pdf"},"referenced_works_count":6,"referenced_works":["https://openalex.org/W1563939609","https://openalex.org/W2005522781","https://openalex.org/W2105613081","https://openalex.org/W2149053750","https://openalex.org/W2172203831","https://openalex.org/W3034729383"],"related_works":["https://openalex.org/W4255044973","https://openalex.org/W2499802997","https://openalex.org/W2112059504","https://openalex.org/W2727729836","https://openalex.org/W1960256358","https://openalex.org/W2548037963","https://openalex.org/W2464377622","https://openalex.org/W642007152","https://openalex.org/W2341426843","https://openalex.org/W2131711534"],"abstract_inverted_index":{"In":[0],"a":[1,5,11],"speech":[2,35,101],"recognition":[3,77],"system":[4],"Voice":[6],"Activity":[7],"Detector":[8],"(VAD)":[9],"is":[10],"crucial":[12],"component":[13],"for":[14,21],"not":[15],"only":[16],"maintaining":[17],"accuracy":[18,88],"but":[19],"also":[20],"reducing":[22],"computational":[23,92],"consumption.":[24],"Front-end":[25],"approaches":[26,52],"which":[27],"drop":[28],"non-speech":[29],"frames":[30,36],"typically":[31],"attempt":[32],"to":[33,71],"detect":[34],"by":[37,94],"utilizing":[38],"speech/non-speech":[39,55,69],"classification":[40,56],"information":[41,57,70],"such":[42],"as":[43],"the":[44,54,68,73,76,87,96],"zero":[45],"crossing":[46],"rate":[47],"or":[48],"statistical":[49],"models.":[50],"These":[51],"discard":[53],"after":[58],"voice":[59,103],"detection.":[60],"This":[61],"paper":[62],"proposes":[63],"an":[64],"approach":[65,84],"that":[66,82],"uses":[67],"adjust":[72],"score":[74],"of":[75],"hypotheses.":[78],"Experimental":[79],"results":[80],"show":[81],"our":[83],"can":[85],"improve":[86],"significantly":[89],"and":[90],"reduce":[91],"consumption":[93],"combining":[95],"frontend":[97],"method.":[98],"Index":[99],"Terms:":[100],"recognition,":[102],"activity":[104],"detection,":[105],"decoder":[106]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
