{"id":"https://openalex.org/W4401609323","doi":"https://doi.org/10.1109/icasspw62465.2024.10626416","title":"Analysis of Self-Supervised Speech Models on Children\u2019s Speech and Infant Vocalizations","display_name":"Analysis of Self-Supervised Speech Models on Children\u2019s Speech and Infant Vocalizations","publication_year":2024,"publication_date":"2024-04-14","ids":{"openalex":"https://openalex.org/W4401609323","doi":"https://doi.org/10.1109/icasspw62465.2024.10626416","pmid":"https://pubmed.ncbi.nlm.nih.gov/40454253"},"language":"en","primary_location":{"id":"doi:10.1109/icasspw62465.2024.10626416","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icasspw62465.2024.10626416","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC12126097/pdf/nihms-2074948.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100456945","display_name":"Jialu Li","orcid":"https://orcid.org/0000-0003-0092-8071"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jialu Li","raw_affiliation_strings":["University of Illinois,Department of Electrical and Computer Engineering"],"affiliations":[{"raw_affiliation_string":"University of Illinois,Department of Electrical and Computer Engineering","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004778663","display_name":"Mark Hasegawa\u2010Johnson","orcid":"https://orcid.org/0000-0002-5631-2893"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mark Hasegawa-Johnson","raw_affiliation_strings":["University of Illinois,Department of Electrical and Computer Engineering"],"affiliations":[{"raw_affiliation_string":"University of Illinois,Department of Electrical and Computer Engineering","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040993718","display_name":"Nancy L. McElwain","orcid":"https://orcid.org/0000-0001-9586-5020"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nancy L. McElwain","raw_affiliation_strings":["University of Illinois,Beckman Institute for Advanced Science and Technology"],"affiliations":[{"raw_affiliation_string":"University of Illinois,Beckman Institute for Advanced Science and Technology","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100456945"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":1.4504,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.84612593,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"2024","issue":null,"first_page":"550","last_page":"554"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13289","display_name":"Infant Health and Development","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/3611","display_name":"Pharmacy"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5837480425834656},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5685805082321167},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3211626410484314}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5837480425834656},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5685805082321167},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3211626410484314}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icasspw62465.2024.10626416","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icasspw62465.2024.10626416","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","raw_type":"proceedings-article"},{"id":"pmid:40454253","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40454253","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"... IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:12126097","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12126097","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC12126097/pdf/nihms-2074948.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Int Conf Acoust Speech Signal Proc Workshops","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:pubmedcentral.nih.gov:12126097","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12126097","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC12126097/pdf/nihms-2074948.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Int Conf Acoust Speech Signal Proc Workshops","raw_type":"Text"},"sustainable_development_goals":[{"display_name":"Gender equality","id":"https://metadata.un.org/sdg/5","score":0.47999998927116394}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4401609323.pdf"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W146900863","https://openalex.org/W1494198834","https://openalex.org/W1583837637","https://openalex.org/W1586176709","https://openalex.org/W2026189271","https://openalex.org/W2085662862","https://openalex.org/W2107162140","https://openalex.org/W2124812654","https://openalex.org/W2125814841","https://openalex.org/W2127141656","https://openalex.org/W2164181499","https://openalex.org/W2187089797","https://openalex.org/W2187824139","https://openalex.org/W2239141610","https://openalex.org/W2747874407","https://openalex.org/W2888744627","https://openalex.org/W2889212027","https://openalex.org/W3167533889","https://openalex.org/W3209059054","https://openalex.org/W4226291682","https://openalex.org/W4226380987","https://openalex.org/W4283073456","https://openalex.org/W4293793697","https://openalex.org/W4319862479","https://openalex.org/W4319862652","https://openalex.org/W4382201918","https://openalex.org/W4385478409","https://openalex.org/W4385807395","https://openalex.org/W4385822277","https://openalex.org/W4385822468","https://openalex.org/W4385822936","https://openalex.org/W4385823034","https://openalex.org/W4385823368","https://openalex.org/W4387183160","https://openalex.org/W6631216910","https://openalex.org/W6752726010","https://openalex.org/W6780218876"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"To":[0],"understand":[1],"why":[2],"self-supervised":[3],"learning":[4],"(SSL)":[5],"models":[6,44,62,113,143],"have":[7,19],"empirically":[8],"achieved":[9],"strong":[10],"performances":[11],"on":[12,21,63,72,145],"several":[13],"speech-processing":[14],"downstream":[15,66],"tasks,":[16],"numerous":[17],"studies":[18],"focused":[20],"analyzing":[22],"the":[23,27,73,108,134,160],"encoded":[24],"information":[25],"of":[26,75,110,136,162],"SSL":[28,43,61,112,142],"layer":[29],"representations":[30,153],"in":[31],"adult":[32],"speech.":[33],"Limited":[34],"work":[35],"has":[36],"investigated":[37],"how":[38],"pre-training":[39],"and":[40,48,82,88,96,128,157],"fine-tuning":[41],"affect":[42],"encoding":[45],"children's":[46,106,126],"speech":[47,74,127,135],"vocalizations.":[49],"In":[50],"this":[51,57,163],"study,":[52],"we":[53],"aim":[54],"to":[55,117,120,133,150],"bridge":[56],"gap":[58],"by":[59],"probing":[60],"two":[64],"relevant":[65],"tasks:":[67],"(1)":[68],"phoneme":[69],"recognition":[70],"(PR)":[71],"adults,":[76],"older":[77,125],"children":[78,84],"(8-10":[79],"years":[80,86],"old),":[81,87],"younger":[83,105,137],"(1-4":[85],"(2)":[89],"vocalization":[90],"classification":[91],"(VC)":[92],"distinguishing":[93],"cry,":[94],"fuss,":[95],"babble":[97],"for":[98],"infants":[99],"under":[100],"14":[101],"months":[102],"old.":[103],"For":[104,139],"PR,":[107],"superiority":[109],"fine-tuned":[111],"is":[114],"largely":[115],"due":[116],"their":[118],"ability":[119],"learn":[121,149],"features":[122,132],"that":[123],"represent":[124],"then":[129],"adapt":[130],"those":[131],"children.":[138],"infant":[140],"VC,":[141],"pre-trained":[144],"large-scale":[146],"home":[147],"recordings":[148],"leverage":[151],"phonetic":[152],"at":[154],"middle":[155],"layers,":[156],"thereby":[158],"enhance":[159],"performance":[161],"task.":[164]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
