{"id":"https://openalex.org/W4392903993","doi":"https://doi.org/10.1109/icassp48485.2024.10446544","title":"Quantifying The Effect Of Simulator-Based Data Augmentation For Speech Recognition On Augmented Reality Glasses","display_name":"Quantifying The Effect Of Simulator-Based Data Augmentation For Speech Recognition On Augmented Reality Glasses","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903993","doi":"https://doi.org/10.1109/icassp48485.2024.10446544"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446544","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446544","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044514558","display_name":"Riku Arakawa","orcid":"https://orcid.org/0000-0001-7868-4754"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Riku Arakawa","raw_affiliation_strings":["Carnegie Mellon University,Pittsburgh,PA,USA","Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Pittsburgh,PA,USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071151080","display_name":"Mathieu Parvaix","orcid":"https://orcid.org/0009-0009-4544-8304"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mathieu Parvaix","raw_affiliation_strings":["Google Research,Mountain View,CA,USA","Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research,Mountain View,CA,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022048907","display_name":"Chiong Ching Lai","orcid":"https://orcid.org/0009-0005-0302-1602"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chiong Lai","raw_affiliation_strings":["Google Research,Mountain View,CA,USA","Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research,Mountain View,CA,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065994318","display_name":"Hakan Erdo\u011fan","orcid":"https://orcid.org/0000-0003-3140-8642"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hakan Erdogan","raw_affiliation_strings":["Google Research,Mountain View,CA,USA","Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research,Mountain View,CA,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016503150","display_name":"Alex Olwal","orcid":"https://orcid.org/0000-0001-7772-0530"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alex Olwal","raw_affiliation_strings":["Google Research,Mountain View,CA,USA","Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research,Mountain View,CA,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5044514558"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":1.5001,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.80704572,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"726","last_page":"730"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.820432186126709},{"id":"https://openalex.org/keywords/augmented-reality","display_name":"Augmented reality","score":0.7134333252906799},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.6923729181289673},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5913847088813782},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.49196478724479675},{"id":"https://openalex.org/keywords/virtual-reality","display_name":"Virtual reality","score":0.45233723521232605},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.4136163890361786},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3603588938713074},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3195589780807495},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.10325419902801514}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.820432186126709},{"id":"https://openalex.org/C153715457","wikidata":"https://www.wikidata.org/wiki/Q254183","display_name":"Augmented reality","level":2,"score":0.7134333252906799},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.6923729181289673},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5913847088813782},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.49196478724479675},{"id":"https://openalex.org/C194969405","wikidata":"https://www.wikidata.org/wiki/Q170519","display_name":"Virtual reality","level":2,"score":0.45233723521232605},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.4136163890361786},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3603588938713074},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3195589780807495},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.10325419902801514},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446544","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446544","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1513603309","https://openalex.org/W2117678320","https://openalex.org/W2555915854","https://openalex.org/W2610523681","https://openalex.org/W2617258110","https://openalex.org/W2696967604","https://openalex.org/W2768862075","https://openalex.org/W2793395175","https://openalex.org/W2963040451","https://openalex.org/W2981900206","https://openalex.org/W2998657200","https://openalex.org/W3093596336","https://openalex.org/W3097471014","https://openalex.org/W3151596526","https://openalex.org/W3160878746","https://openalex.org/W3198454118","https://openalex.org/W3206619923","https://openalex.org/W4205689591","https://openalex.org/W4210439360","https://openalex.org/W4226373941","https://openalex.org/W4307694777","https://openalex.org/W4312701366","https://openalex.org/W4385822950","https://openalex.org/W6630830844","https://openalex.org/W6779079847"],"related_works":["https://openalex.org/W2172197285","https://openalex.org/W4378228679","https://openalex.org/W2736982640","https://openalex.org/W4378228262","https://openalex.org/W3107375852","https://openalex.org/W2752321621","https://openalex.org/W2789244453","https://openalex.org/W2396048001","https://openalex.org/W2905188205","https://openalex.org/W1559044324"],"abstract_inverted_index":{"Augmented":[0],"reality":[1],"(AR)":[2],"glasses":[3],"have":[4],"an":[5,117],"immense":[6],"potential":[7],"for":[8,21,30,80,120],"enhancing":[9],"conversations":[10],"by":[11,142,151],"leveraging":[12,68],"speech":[13,52,127],"recognition":[14,128],"to":[15,23,46,49,72,86],"display":[16],"real-time":[17],"transcription":[18],"or":[19,29],"translation,":[20],"example,":[22],"assist":[24],"people":[25,31],"with":[26,155],"hearing":[27],"impairments":[28],"conversing":[32],"in":[33,39,130,146],"a":[34,69,156],"non-native":[35],"language.":[36],"For":[37],"deployment":[38],"real":[40,160],"environments,":[41],"such":[42,81],"systems,":[43],"however,":[44],"need":[45],"be":[47,139],"able":[48],"separate":[50],"the":[51,65,88,98,111,147,174],"of":[53,67,76,93,113,159],"interest":[54],"from":[55,172],"noise":[56],"and":[57,103,150],"other":[58],"speakers.":[59],"In":[60],"this":[61],"paper,":[62],"we":[63,108,134],"evaluate":[64],"effectiveness":[66],"room":[70,148,168],"simulator":[71],"generate":[73],"large":[74],"amounts":[75],"simulated":[77,104],"training":[78,121],"data":[79,95,115,154],"front-end":[82],"sound":[83],"separation":[84],"models,":[85],"complement":[87],"ideal,":[89],"but":[90],"costly,":[91],"collection":[92],"real-world":[94,131],"recorded":[96,102],"on":[97,182,185],"device.":[99],"Using":[100],"both":[101],"impulse":[105],"responses":[106],"(IRs),":[107],"demonstrate":[109],"that":[110,123,136,166],"use":[112],"simulation":[114],"is":[116],"effective":[118],"method":[119],"models":[122],"can":[124,138],"ultimately":[125],"enhance":[126],"performance":[129,137],"settings.":[132],"Furthermore,":[133],"show":[135],"further":[140],"improved":[141],"adding":[143],"microphone":[144],"directivity":[145],"simulation,":[149],"fusing":[152],"synthetic":[153],"small":[157],"amount":[158],"IRs.":[161],"Our":[162],"results":[163],"also":[164],"suggest":[165],"existing":[167],"simulators":[169],"would":[170],"benefit":[171],"incorporating":[173],"head":[175],"shadow":[176],"effect,":[177],"given":[178],"its":[179],"significant":[180],"impact":[181],"multi-microphone":[183],"recordings":[184],"AR":[186],"glasses.":[187]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
