{"id":"https://openalex.org/W3206495532","doi":"https://doi.org/10.1109/icassp43922.2022.9747197","title":"Universal Paralinguistic Speech Representations Using self-Supervised Conformers","display_name":"Universal Paralinguistic Speech Representations Using self-Supervised Conformers","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W3206495532","doi":"https://doi.org/10.1109/icassp43922.2022.9747197","mag":"3206495532"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747197","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747197","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2110.04621","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076659859","display_name":"Joel Shor","orcid":"https://orcid.org/0000-0002-6729-5988"},"institutions":[{"id":"https://openalex.org/I1302485747","display_name":"Verizon (United States)","ror":"https://ror.org/02vdyxx64","country_code":"US","type":"company","lineage":["https://openalex.org/I1302485747"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Joel Shor","raw_affiliation_strings":["Verily Life Sciences,Boston,USA","Verily Life Sciences, Boston, USA"],"affiliations":[{"raw_affiliation_string":"Verily Life Sciences,Boston,USA","institution_ids":["https://openalex.org/I1302485747"]},{"raw_affiliation_string":"Verily Life Sciences, Boston, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103622427","display_name":"Aren Jansen","orcid":null},"institutions":[{"id":"https://openalex.org/I2800240351","display_name":"Mountain View College","ror":"https://ror.org/04fh8an03","country_code":"US","type":"education","lineage":["https://openalex.org/I1291072267","https://openalex.org/I2800240351"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aren Jansen","raw_affiliation_strings":["Mountain View,California,USA","Mountain View, California, USA"],"affiliations":[{"raw_affiliation_string":"Mountain View,California,USA","institution_ids":["https://openalex.org/I2800240351"]},{"raw_affiliation_string":"Mountain View, California, USA","institution_ids":["https://openalex.org/I2800240351"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100750907","display_name":"Wei Han","orcid":"https://orcid.org/0000-0002-4201-9645"},"institutions":[{"id":"https://openalex.org/I2800240351","display_name":"Mountain View College","ror":"https://ror.org/04fh8an03","country_code":"US","type":"education","lineage":["https://openalex.org/I1291072267","https://openalex.org/I2800240351"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei Han","raw_affiliation_strings":["Mountain View,California,USA","Mountain View, California, USA"],"affiliations":[{"raw_affiliation_string":"Mountain View,California,USA","institution_ids":["https://openalex.org/I2800240351"]},{"raw_affiliation_string":"Mountain View, California, USA","institution_ids":["https://openalex.org/I2800240351"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100613715","display_name":"Daniel Park","orcid":"https://orcid.org/0000-0002-1919-0460"},"institutions":[{"id":"https://openalex.org/I2800240351","display_name":"Mountain View College","ror":"https://ror.org/04fh8an03","country_code":"US","type":"education","lineage":["https://openalex.org/I1291072267","https://openalex.org/I2800240351"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Park","raw_affiliation_strings":["Mountain View,California,USA","Mountain View, California, USA"],"affiliations":[{"raw_affiliation_string":"Mountain View,California,USA","institution_ids":["https://openalex.org/I2800240351"]},{"raw_affiliation_string":"Mountain View, California, USA","institution_ids":["https://openalex.org/I2800240351"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100433648","display_name":"Yu Zhang","orcid":"https://orcid.org/0000-0002-9505-1833"},"institutions":[{"id":"https://openalex.org/I2800240351","display_name":"Mountain View College","ror":"https://ror.org/04fh8an03","country_code":"US","type":"education","lineage":["https://openalex.org/I1291072267","https://openalex.org/I2800240351"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Zhang","raw_affiliation_strings":["Mountain View,California,USA","Mountain View, California, USA"],"affiliations":[{"raw_affiliation_string":"Mountain View,California,USA","institution_ids":["https://openalex.org/I2800240351"]},{"raw_affiliation_string":"Mountain View, California, USA","institution_ids":["https://openalex.org/I2800240351"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5076659859"],"corresponding_institution_ids":["https://openalex.org/I1302485747"],"apc_list":null,"apc_paid":null,"fwci":3.3507,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.93602587,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3169","last_page":"3173"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/paralanguage","display_name":"Paralanguage","score":0.8598334789276123},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.778359055519104},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7311335802078247},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6308392882347107},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.595544695854187},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5940759778022766},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5657017827033997},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5480378866195679},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4984927177429199},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.40173226594924927},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3752048909664154},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33015984296798706},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.06231576204299927}],"concepts":[{"id":"https://openalex.org/C133378560","wikidata":"https://www.wikidata.org/wiki/Q1753225","display_name":"Paralanguage","level":2,"score":0.8598334789276123},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.778359055519104},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7311335802078247},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6308392882347107},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.595544695854187},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5940759778022766},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5657017827033997},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5480378866195679},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4984927177429199},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.40173226594924927},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3752048909664154},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33015984296798706},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.06231576204299927},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747197","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747197","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2110.04621","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2110.04621","pdf_url":"https://arxiv.org/pdf/2110.04621","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2110.04621","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2110.04621","pdf_url":"https://arxiv.org/pdf/2110.04621","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W2030931454","https://openalex.org/W2146334809","https://openalex.org/W2396589722","https://openalex.org/W2526050071","https://openalex.org/W2593116425","https://openalex.org/W2619697695","https://openalex.org/W2726515241","https://openalex.org/W2767754137","https://openalex.org/W2797583228","https://openalex.org/W2808706139","https://openalex.org/W2842511635","https://openalex.org/W2936802426","https://openalex.org/W2942810103","https://openalex.org/W2955425717","https://openalex.org/W2962760690","https://openalex.org/W2963403868","https://openalex.org/W2963432880","https://openalex.org/W2964525587","https://openalex.org/W2967606780","https://openalex.org/W2972943112","https://openalex.org/W2973157397","https://openalex.org/W2977259558","https://openalex.org/W2982083293","https://openalex.org/W2991572650","https://openalex.org/W2995181338","https://openalex.org/W3006926732","https://openalex.org/W3015817524","https://openalex.org/W3036601975","https://openalex.org/W3093579165","https://openalex.org/W3096216346","https://openalex.org/W3096383643","https://openalex.org/W3097777922","https://openalex.org/W3099782249","https://openalex.org/W3162391496","https://openalex.org/W3163571828","https://openalex.org/W3169320628","https://openalex.org/W3195830874","https://openalex.org/W3196876847","https://openalex.org/W3197150384","https://openalex.org/W3197580070","https://openalex.org/W3198239978","https://openalex.org/W3204696009","https://openalex.org/W3209059054","https://openalex.org/W4287022992","https://openalex.org/W4297808394","https://openalex.org/W4385245566","https://openalex.org/W6712588427","https://openalex.org/W6734260513","https://openalex.org/W6738607494","https://openalex.org/W6739901393","https://openalex.org/W6750665317","https://openalex.org/W6752888775","https://openalex.org/W6761472960","https://openalex.org/W6762718338","https://openalex.org/W6771812881","https://openalex.org/W6780218876","https://openalex.org/W6784614252","https://openalex.org/W6800217721","https://openalex.org/W6802600657"],"related_works":["https://openalex.org/W2064370490","https://openalex.org/W3166813893","https://openalex.org/W2910013580","https://openalex.org/W2391900574","https://openalex.org/W3200958703","https://openalex.org/W1990078780","https://openalex.org/W3108667266","https://openalex.org/W2376619307","https://openalex.org/W3118437876","https://openalex.org/W1527444722"],"abstract_inverted_index":{"Many":[0],"speech":[1,58],"applications":[2],"require":[3],"understanding":[4],"aspects":[5],"beyond":[6],"the":[7,17,97,100,104,116,124],"words":[8],"being":[9],"spoken,":[10],"such":[11],"as":[12],"recognizing":[13],"emotion,":[14],"detecting":[15],"whether":[16],"speaker":[18],"is":[19],"wearing":[20],"a":[21,34,46,54,132],"mask,":[22],"or":[23],"distinguishing":[24],"real":[25],"from":[26,40],"synthetic":[27],"speech.":[28],"In":[29],"this":[30],"work,":[31],"we":[32],"introduce":[33],"new":[35],"state-of-the-art":[36],"paralinguistic":[37],"representation":[38,72,135],"derived":[39],"large-scale,":[41],"fully":[42],"self-supervised":[43],"training":[44],"of":[45,57,69,86,99,111],"600M+":[47],"parameter":[48],"Conformer-based":[49],"architecture.":[50],"We":[51],"benchmark":[52],"on":[53,67,108,141],"diverse":[55],"set":[56],"tasks":[59],"and":[60],"demonstrate":[61,89],"that":[62,102],"simple":[63],"linear":[64],"classifiers":[65],"trained":[66],"top":[68],"our":[70],"time-averaged":[71],"outperform":[73],"nearly":[74],"all":[75,142],"previous":[76],"results,":[77],"in":[78,123],"some":[79],"cases":[80],"by":[81],"large":[82],"margins.":[83],"Our":[84],"analyses":[85],"context-window":[87],"size":[88],"that,":[90],"surprisingly,":[91],"2":[92],"second":[93],"context-windows":[94],"achieve":[95],"96%":[96],"performance":[98,127,140],"Conformers":[101],"use":[103],"full":[105],"long-term":[106],"context":[107],"7":[109],"out":[110],"9":[112],"tasks.":[113,143],"Furthermore,":[114],"while":[115],"best":[117],"per-task":[118],"representations":[119],"are":[120],"extracted":[121],"internally":[122],"network,":[125],"stable":[126],"across":[128],"several":[129],"layers":[130],"allows":[131],"single":[133],"universal":[134],"to":[136],"reach":[137],"near":[138],"optimal":[139]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":13},{"year":2022,"cited_by_count":9}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
