{"id":"https://openalex.org/W4392904445","doi":"https://doi.org/10.1109/icassp48485.2024.10447428","title":"Automatic Speech Recognition Tuned for Child Speech in the Classroom","display_name":"Automatic Speech Recognition Tuned for Child Speech in the Classroom","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392904445","doi":"https://doi.org/10.1109/icassp48485.2024.10447428"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447428","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447428","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058403517","display_name":"Rosy Southwell","orcid":"https://orcid.org/0000-0003-4141-523X"},"institutions":[{"id":"https://openalex.org/I4210153173","display_name":"Institut des Sciences Cognitives","ror":"https://ror.org/058hz8544","country_code":"FR","type":"facility","lineage":["https://openalex.org/I100532134","https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I4210139971","https://openalex.org/I4210153173","https://openalex.org/I4210158834"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Rosy Southwell","raw_affiliation_strings":["University of Colorado,Institute of Cognitive Science","Institute of Cognitive Science, University of Colorado"],"affiliations":[{"raw_affiliation_string":"University of Colorado,Institute of Cognitive Science","institution_ids":["https://openalex.org/I4210153173"]},{"raw_affiliation_string":"Institute of Cognitive Science, University of Colorado","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038893146","display_name":"W. Dixon Ward","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153173","display_name":"Institut des Sciences Cognitives","ror":"https://ror.org/058hz8544","country_code":"FR","type":"facility","lineage":["https://openalex.org/I100532134","https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I4210139971","https://openalex.org/I4210153173","https://openalex.org/I4210158834"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Wayne Ward","raw_affiliation_strings":["University of Colorado,Institute of Cognitive Science","Institute of Cognitive Science, University of Colorado"],"affiliations":[{"raw_affiliation_string":"University of Colorado,Institute of Cognitive Science","institution_ids":["https://openalex.org/I4210153173"]},{"raw_affiliation_string":"Institute of Cognitive Science, University of Colorado","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085746637","display_name":"Viet Anh Trinh","orcid":"https://orcid.org/0000-0002-1660-6627"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Viet Anh Trinh","raw_affiliation_strings":["Worcester Polytechnic Institute"],"affiliations":[{"raw_affiliation_string":"Worcester Polytechnic Institute","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071559009","display_name":"Charis Clevenger","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153173","display_name":"Institut des Sciences Cognitives","ror":"https://ror.org/058hz8544","country_code":"FR","type":"facility","lineage":["https://openalex.org/I100532134","https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I4210139971","https://openalex.org/I4210153173","https://openalex.org/I4210158834"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Charis Clevenger","raw_affiliation_strings":["University of Colorado,Institute of Cognitive Science","Institute of Cognitive Science, University of Colorado"],"affiliations":[{"raw_affiliation_string":"University of Colorado,Institute of Cognitive Science","institution_ids":["https://openalex.org/I4210153173"]},{"raw_affiliation_string":"Institute of Cognitive Science, University of Colorado","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094178996","display_name":"Clay Clevenger","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153173","display_name":"Institut des Sciences Cognitives","ror":"https://ror.org/058hz8544","country_code":"FR","type":"facility","lineage":["https://openalex.org/I100532134","https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I4210139971","https://openalex.org/I4210153173","https://openalex.org/I4210158834"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Clay Clevenger","raw_affiliation_strings":["University of Colorado,Institute of Cognitive Science","Institute of Cognitive Science, University of Colorado"],"affiliations":[{"raw_affiliation_string":"University of Colorado,Institute of Cognitive Science","institution_ids":["https://openalex.org/I4210153173"]},{"raw_affiliation_string":"Institute of Cognitive Science, University of Colorado","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094178997","display_name":"Emily Watts","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153173","display_name":"Institut des Sciences Cognitives","ror":"https://ror.org/058hz8544","country_code":"FR","type":"facility","lineage":["https://openalex.org/I100532134","https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I4210139971","https://openalex.org/I4210153173","https://openalex.org/I4210158834"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Emily Watts","raw_affiliation_strings":["University of Colorado,Institute of Cognitive Science","Institute of Cognitive Science, University of Colorado"],"affiliations":[{"raw_affiliation_string":"University of Colorado,Institute of Cognitive Science","institution_ids":["https://openalex.org/I4210153173"]},{"raw_affiliation_string":"Institute of Cognitive Science, University of Colorado","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027141730","display_name":"Jason G. Reitman","orcid":"https://orcid.org/0000-0003-4552-5874"},"institutions":[{"id":"https://openalex.org/I4210153173","display_name":"Institut des Sciences Cognitives","ror":"https://ror.org/058hz8544","country_code":"FR","type":"facility","lineage":["https://openalex.org/I100532134","https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I4210139971","https://openalex.org/I4210153173","https://openalex.org/I4210158834"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Jason Reitman","raw_affiliation_strings":["University of Colorado,Institute of Cognitive Science","Institute of Cognitive Science, University of Colorado"],"affiliations":[{"raw_affiliation_string":"University of Colorado,Institute of Cognitive Science","institution_ids":["https://openalex.org/I4210153173"]},{"raw_affiliation_string":"Institute of Cognitive Science, University of Colorado","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008316506","display_name":"Sidney K. D\u2019Mello","orcid":"https://orcid.org/0000-0003-0347-2807"},"institutions":[{"id":"https://openalex.org/I4210153173","display_name":"Institut des Sciences Cognitives","ror":"https://ror.org/058hz8544","country_code":"FR","type":"facility","lineage":["https://openalex.org/I100532134","https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I4210139971","https://openalex.org/I4210153173","https://openalex.org/I4210158834"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Sidney D\u2019Mello","raw_affiliation_strings":["University of Colorado,Institute of Cognitive Science","Institute of Cognitive Science, University of Colorado"],"affiliations":[{"raw_affiliation_string":"University of Colorado,Institute of Cognitive Science","institution_ids":["https://openalex.org/I4210153173"]},{"raw_affiliation_string":"Institute of Cognitive Science, University of Colorado","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027788582","display_name":"Jacob Whitehill","orcid":"https://orcid.org/0000-0002-5851-312X"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jacob Whitehill","raw_affiliation_strings":["Worcester Polytechnic Institute"],"affiliations":[{"raw_affiliation_string":"Worcester Polytechnic Institute","institution_ids":["https://openalex.org/I107077323"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5058403517"],"corresponding_institution_ids":["https://openalex.org/I4210153173"],"apc_list":null,"apc_paid":null,"fwci":4.0569,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.94287767,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"12291","last_page":"12295"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7359230518341064},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.7346327304840088},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7074761390686035},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6348497867584229},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.54688560962677},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.48539531230926514},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.46242019534111023},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.42697620391845703},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.423850417137146},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37009555101394653},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.36244192719459534},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33415114879608154},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.26620563864707947},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.11720001697540283}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7359230518341064},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.7346327304840088},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7074761390686035},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6348497867584229},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.54688560962677},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.48539531230926514},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.46242019534111023},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.42697620391845703},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.423850417137146},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37009555101394653},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.36244192719459534},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33415114879608154},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.26620563864707947},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11720001697540283},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447428","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447428","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Gender equality","id":"https://metadata.un.org/sdg/5","score":0.4099999964237213}],"awards":[{"id":"https://openalex.org/G5283821830","display_name":null,"funder_award_id":"2019805","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8202432720","display_name":"CAREER: Developing New Scientific Instruments for Classroom Observation: A Multi-modal Machine Learning Approach","funder_award_id":"2046505","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8498637117","display_name":null,"funder_award_id":"DRL 2019805","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W121610373","https://openalex.org/W1494198834","https://openalex.org/W2094954292","https://openalex.org/W2096653978","https://openalex.org/W2124812654","https://openalex.org/W2145276111","https://openalex.org/W2295983515","https://openalex.org/W2511419867","https://openalex.org/W2936774411","https://openalex.org/W2979826702","https://openalex.org/W3041179138","https://openalex.org/W3167533889","https://openalex.org/W3168867926","https://openalex.org/W3199511834","https://openalex.org/W4210300569","https://openalex.org/W4210482569","https://openalex.org/W4226219992","https://openalex.org/W4287855004","https://openalex.org/W4292119927","https://openalex.org/W4311121803","https://openalex.org/W4381163923","https://openalex.org/W4384918448","https://openalex.org/W4385822587","https://openalex.org/W4404518524","https://openalex.org/W6770506093","https://openalex.org/W6796581206","https://openalex.org/W6839451629","https://openalex.org/W6842258392","https://openalex.org/W6847363464","https://openalex.org/W6854866820","https://openalex.org/W6855897889"],"related_works":["https://openalex.org/W2594897229","https://openalex.org/W2151348424","https://openalex.org/W4221142855","https://openalex.org/W2050138804","https://openalex.org/W4290708361","https://openalex.org/W2129812225","https://openalex.org/W26527944","https://openalex.org/W2784059283","https://openalex.org/W2291624303","https://openalex.org/W2105439218"],"abstract_inverted_index":{"K-12":[0],"school":[1],"classrooms":[2],"have":[3],"proven":[4],"to":[5,18,48,92,113,139,154],"be":[6],"a":[7,83,109,118,128,136,150,170],"challenging":[8,120],"environment":[9],"for":[10,51],"Automatic":[11],"Speech":[12],"Recognition":[13],"(ASR)":[14],"systems,":[15],"both":[16,166],"due":[17],"background":[19,74],"noise":[20],"and":[21,23,27,42,59,68,108,169],"conversation,":[22],"differences":[24],"in":[25,54,87],"linguistic":[26],"acoustic":[28],"properties":[29],"from":[30],"adult":[31,67],"speech,":[32],"on":[33,46,63,94,117,165],"which":[34],"the":[35,55,95,103,160],"majority":[36],"of":[37,66,99,143,162],"ASR":[38,50],"systems":[39],"are":[40],"trained":[41],"evaluated.":[43],"We":[44,125,158],"report":[45],"experiments":[47],"improve":[49],"child":[52,69,100],"speech":[53,70,101,122,172],"classroom":[56,73,121,171],"by":[57],"training":[58],"fine-tuning":[60],"transformer":[61],"models":[62],"public":[64,96],"corpora":[65],"augmented":[71],"with":[72],"noise.":[75],"By":[76],"tuning":[77],"OpenAI\u2019s":[78],"Whisper":[79],"model":[80],"we":[81],"achieve":[82],"38%":[84],"relative":[85,111],"reduction":[86,112],"word":[88],"error":[89],"rate":[90],"(WER)":[91],"9.2%":[93],"MyST":[97],"dataset":[98,123],"\u2013":[102,107],"lowest":[104],"yet":[105],"reported":[106],"7%":[110],"reach":[114],"54%":[115],"WER":[116],"more":[119],"(ISAT).":[124],"also":[126],"introduce":[127],"novel":[129],"beam":[130],"hypothesis":[131],"rescoring":[132],"method":[133],"that":[134],"incorporates":[135],"speed-aware":[137],"term":[138],"capture":[140],"prior":[141],"knowledge":[142],"human":[144],"speaking":[145],"rates,":[146],"as":[147,149],"well":[148],"Large":[151],"Language":[152],"Model,":[153],"select":[155],"among":[156],"hypotheses.":[157],"demonstrate":[159],"effectiveness":[161],"this":[163],"technique":[164],"publicly-available":[167],"datasets":[168],"dataset.":[173]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
