{"id":"https://openalex.org/W4296068413","doi":"https://doi.org/10.21437/interspeech.2022-118","title":"TRILLsson: Distilled Universal Paralinguistic Speech Representations","display_name":"TRILLsson: Distilled Universal Paralinguistic Speech Representations","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4296068413","doi":"https://doi.org/10.21437/interspeech.2022-118"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-118","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-118","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076659859","display_name":"Joel Shor","orcid":"https://orcid.org/0000-0002-6729-5988"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Joel Shor","raw_affiliation_strings":["Verily Life Sciences 1 , Google Research 2"],"affiliations":[{"raw_affiliation_string":"Verily Life Sciences 1 , Google Research 2","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055731751","display_name":"Subhashini Venugopalan","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Subhashini Venugopalan","raw_affiliation_strings":["Verily Life Sciences 1 , Google Research 2"],"affiliations":[{"raw_affiliation_string":"Verily Life Sciences 1 , Google Research 2","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5076659859"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":3.3507,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.93750994,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"356","last_page":"360"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/paralanguage","display_name":"Paralanguage","score":0.8999726176261902},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7697222232818604},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6852239966392517},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5813624858856201},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.491190642118454},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4216618239879608},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3823513388633728},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.365306556224823},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35748496651649475},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.07448956370353699}],"concepts":[{"id":"https://openalex.org/C133378560","wikidata":"https://www.wikidata.org/wiki/Q1753225","display_name":"Paralanguage","level":2,"score":0.8999726176261902},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7697222232818604},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6852239966392517},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5813624858856201},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.491190642118454},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4216618239879608},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3823513388633728},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.365306556224823},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35748496651649475},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.07448956370353699},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-118","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-118","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5899999737739563}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1821462560","https://openalex.org/W2030931454","https://openalex.org/W2146334809","https://openalex.org/W2526050071","https://openalex.org/W2593116425","https://openalex.org/W2726515241","https://openalex.org/W2797583228","https://openalex.org/W2936802426","https://openalex.org/W2979826702","https://openalex.org/W2995181338","https://openalex.org/W3005680577","https://openalex.org/W3006926732","https://openalex.org/W3036601975","https://openalex.org/W3097777922","https://openalex.org/W3145444543","https://openalex.org/W3159481202","https://openalex.org/W3163571828","https://openalex.org/W3169320628","https://openalex.org/W3196876847","https://openalex.org/W3196974791","https://openalex.org/W3197150384","https://openalex.org/W3198239978","https://openalex.org/W3204696009","https://openalex.org/W3206495532","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2064370490","https://openalex.org/W3166813893","https://openalex.org/W2910013580","https://openalex.org/W2391900574","https://openalex.org/W3200958703","https://openalex.org/W147964346","https://openalex.org/W1990078780","https://openalex.org/W2778981579","https://openalex.org/W2376619307","https://openalex.org/W4250647969"],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,124],"self-supervision":[3],"have":[4],"dramatically":[5],"improved":[6],"the":[7,80,93,96,105,117,131,141,157,170],"quality":[8],"of":[9,14,43,95,109,135,149],"speech":[10,45],"representations.":[11],"However,":[12],"deployment":[13],"state-of-the-art":[15,52],"embedding":[16],"models":[17,46,63,78,139],"on":[18,58,66,79,107,115,133,147,162],"devices":[19],"has":[20],"been":[21],"restricted":[22],"due":[23],"to":[24],"their":[25],"limited":[26],"public":[27,67],"availability":[28],"and":[29,50,61,74,112,127,152],"large":[30],"resource":[31],"footprint.":[32],"Our":[33,54,85,138],"work":[34],"addresses":[35],"these":[36],"issues":[37],"by":[38],"publicly":[39],"releasing":[40],"a":[41],"collection":[42],"paralinguistic":[44],"that":[47],"are":[48,64],"small":[49],"near":[51],"performance.":[53],"approach":[55],"is":[56,89,113,122],"based":[57],"knowledge":[59],"distillation,":[60],"our":[62,77,153],"distilled":[65,87],"data":[68],"only.":[69],"We":[70],"explore":[71],"different":[72],"architectures":[73],"thoroughly":[75],"evaluate":[76],"Non-Semantic":[81],"Speech":[82],"(NOSS)":[83],"benchmark.":[84],"largest":[86],"model":[88,98,121,146,155],"less":[90],"than":[91],"15%":[92],"size":[94,125],"original":[97],"(314MB":[99],"vs":[100],"2.2GB),":[101],"achieves":[102,128],"over":[103,129],"96%":[104],"accuracy":[106,132],"6":[108,134,148],"7":[110,136,150],"tasks,":[111,151],"trained":[114],"6.5%":[116],"data.":[118],"The":[119],"smallest":[120,154],"1%":[123],"(22MB)":[126],"90%":[130],"tasks.":[137],"outperform":[140],"open":[142,158],"source":[143,159],"Wav2Vec":[144,160],"2.0":[145,161],"outperforms":[156],"both":[163],"emotion":[164],"recognition":[165],"tasks":[166],"despite":[167],"being":[168],"7%":[169],"size.":[171]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
