{"id":"https://openalex.org/W7148330821","doi":"https://doi.org/10.1109/asru65441.2025.11433842","title":"Selection of Layers from Self-supervised Learning Models for Predicting Mean-Opinion-Score of Speech","display_name":"Selection of Layers from Self-supervised Learning Models for Predicting Mean-Opinion-Score of Speech","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148330821","doi":"https://doi.org/10.1109/asru65441.2025.11433842"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11433842","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11433842","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051156480","display_name":"Xinyu Liang","orcid":"https://orcid.org/0000-0003-1314-9042"},"institutions":[{"id":"https://openalex.org/I4210164304","display_name":"Mabtech (Sweden)","ror":"https://ror.org/057s3rs39","country_code":"SE","type":"company","lineage":["https://openalex.org/I4210164304"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Xinyu Liang","raw_affiliation_strings":["HCLTech AB,Sweden"],"affiliations":[{"raw_affiliation_string":"HCLTech AB,Sweden","institution_ids":["https://openalex.org/I4210164304"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092639592","display_name":"Fredrik Cumlin","orcid":null},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Fredrik Cumlin","raw_affiliation_strings":["KTH Royal Institute of Technology,School of Electrical Engineering and Computer Science,Sweden"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,School of Electrical Engineering and Computer Science,Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132815570","display_name":"Victor Ungureanu","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Victor Ungureanu","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001022750","display_name":"Chandan K. Reddy","orcid":"https://orcid.org/0000-0003-2839-3662"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chandan K. A. Reddy","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014795917","display_name":"Christian Sch\u00fcldt","orcid":"https://orcid.org/0000-0003-3439-0468"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christian Sch\u00fcldt","raw_affiliation_strings":["Google LLC"],"affiliations":[{"raw_affiliation_string":"Google LLC","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101692633","display_name":"Saikat Chatterjee","orcid":"https://orcid.org/0000-0003-2638-6047"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Saikat Chatterjee","raw_affiliation_strings":["KTH Royal Institute of Technology,School of Electrical Engineering and Computer Science,Sweden"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,School of Electrical Engineering and Computer Science,Sweden","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5051156480"],"corresponding_institution_ids":["https://openalex.org/I4210164304"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.8755358,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.5289000272750854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.5289000272750854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.11249999701976776,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.06840000301599503,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6725999712944031},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.5066999793052673},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4796000123023987},{"id":"https://openalex.org/keywords/predictive-modelling","display_name":"Predictive modelling","score":0.4742000102996826},{"id":"https://openalex.org/keywords/model-selection","display_name":"Model selection","score":0.4733000099658966},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.46059998869895935},{"id":"https://openalex.org/keywords/multiple-models","display_name":"Multiple Models","score":0.40470001101493835},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.39559999108314514}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7245000004768372},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6725999712944031},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5845999717712402},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.5066999793052673},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49869999289512634},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4796000123023987},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.4742000102996826},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.4733000099658966},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.46059998869895935},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4122999906539917},{"id":"https://openalex.org/C2779714256","wikidata":"https://www.wikidata.org/wiki/Q25305062","display_name":"Multiple Models","level":2,"score":0.40470001101493835},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.39559999108314514},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.38100001215934753},{"id":"https://openalex.org/C152877465","wikidata":"https://www.wikidata.org/wiki/Q208042","display_name":"Regression analysis","level":2,"score":0.36230000853538513},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3488999903202057},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C48921125","wikidata":"https://www.wikidata.org/wiki/Q10861030","display_name":"Linear regression","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11433842","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11433842","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1849277567","https://openalex.org/W2075843680","https://openalex.org/W2962780374","https://openalex.org/W2972394484","https://openalex.org/W3016160783","https://openalex.org/W3161480375","https://openalex.org/W3162133897","https://openalex.org/W3171304840","https://openalex.org/W3196475561","https://openalex.org/W3197580070","https://openalex.org/W3198275944","https://openalex.org/W3202278141","https://openalex.org/W3206189675","https://openalex.org/W3207932315","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W4221144124","https://openalex.org/W4225956675","https://openalex.org/W4226380987","https://openalex.org/W4292402161","https://openalex.org/W4296068974","https://openalex.org/W4297841508","https://openalex.org/W4375869259","https://openalex.org/W4385822799","https://openalex.org/W4385823338","https://openalex.org/W4391021629","https://openalex.org/W4402112521","https://openalex.org/W4404577625","https://openalex.org/W4415433951"],"related_works":[],"abstract_inverted_index":{"Self-supervised":[0],"learning":[1],"(SSL)":[2],"models":[3,18,44,65],"like":[4],"Wav2Vec2,":[5],"HuBERT,":[6],"and":[7,38,105,121],"WavLM":[8],"have":[9],"been":[10],"widely":[11],"used":[12],"in":[13],"speech":[14,40],"processing.":[15],"These":[16,110],"transformer-based":[17],"consist":[19],"of":[20,27,62,115],"multiple":[21,63],"layers,":[22],"each":[23,72],"capturing":[24],"different":[25,60],"levels":[26],"representation.":[28],"While":[29],"prior":[30],"studies":[31],"explored":[32],"their":[33],"layer-wise":[34],"representations":[35],"for":[36,66],"efficiency":[37],"performance,":[39],"quality":[41],"assessment":[42],"(SQA)":[43],"predominantly":[45],"rely":[46],"on":[47],"last-layer":[48],"features,":[49],"leaving":[50],"intermediate":[51],"layers":[52,61],"underexamined.":[53],"In":[54],"this":[55],"work,":[56],"we":[57],"systematically":[58],"evaluate":[59],"SSL":[64],"predicting":[67],"mean-opinion-score":[68],"(MOS).":[69],"Features":[70],"from":[71,94],"layer":[73],"are":[74],"fed":[75],"into":[76],"a":[77],"lightweight":[78],"regression":[79],"network":[80],"to":[81,99],"assess":[82],"effectiveness.":[83],"Our":[84],"experiments":[85],"consistently":[86],"show":[87],"early-layers":[88],"features":[89],"outperform":[90],"or":[91],"match":[92],"those":[93],"the":[95,113],"last":[96],"layer,":[97],"leading":[98],"significant":[100],"improvements":[101],"over":[102],"conventional":[103],"approaches":[104],"state-of-the-art":[106],"MOS":[107],"prediction":[108],"models.":[109],"findings":[111],"highlight":[112],"advantages":[114],"early-layer":[116],"selection,":[117],"offering":[118],"enhanced":[119],"performance":[120],"reduced":[122],"system":[123],"complexity.":[124]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
