{"id":"https://openalex.org/W4406937735","doi":"https://doi.org/10.48550/arxiv.2501.16344","title":"WhiSPA: Semantically and Psychologically Aligned Whisper with Self-Supervised Contrastive and Student-Teacher Learning","display_name":"WhiSPA: Semantically and Psychologically Aligned Whisper with Self-Supervised Contrastive and Student-Teacher Learning","publication_year":2025,"publication_date":"2025-01-15","ids":{"openalex":"https://openalex.org/W4406937735","doi":"https://doi.org/10.48550/arxiv.2501.16344"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2501.16344","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.16344","pdf_url":"https://arxiv.org/pdf/2501.16344","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2501.16344","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102507206","display_name":"R. Rajeswara Rao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rao, Rajath","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055755559","display_name":"Adithya V Ganesan","orcid":"https://orcid.org/0000-0001-6179-624X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ganesan, Adithya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073392421","display_name":"Oscar Kjell","orcid":"https://orcid.org/0000-0002-2728-6278"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kjell, Oscar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078562814","display_name":"Jonah Luby","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luby, Jonah","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045590289","display_name":"Akshay Raghavan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raghavan, Akshay","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092090123","display_name":"Scott Feltman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feltman, Scott","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017386883","display_name":"Whitney R. Ringwald","orcid":"https://orcid.org/0000-0002-8883-5963"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ringwald, Whitney","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053433716","display_name":"Ryan L. Boyd","orcid":"https://orcid.org/0000-0002-1876-6050"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Boyd, Ryan L.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083660592","display_name":"Benjamin J. Luft","orcid":"https://orcid.org/0000-0001-9008-7004"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luft, Benjamin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057125626","display_name":"Camilo J. Ruggero","orcid":"https://orcid.org/0000-0002-8482-6269"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruggero, Camilo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088320958","display_name":"Neville Ryant","orcid":"https://orcid.org/0000-0003-2545-6912"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ryant, Neville","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074056978","display_name":"Roman Kotov","orcid":"https://orcid.org/0000-0001-9569-8381"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kotov, Roman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5046253607","display_name":"H. Andrew Schwartz","orcid":"https://orcid.org/0000-0002-6383-3339"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schwartz, H. Andrew","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5102507206"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11058","display_name":"Ethics in Business and Education","score":0.7986999750137329,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11058","display_name":"Ethics in Business and Education","score":0.7986999750137329,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12755","display_name":"Legal Education and Practice Innovations","score":0.7167999744415283,"subfield":{"id":"https://openalex.org/subfields/3308","display_name":"Law"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.44328197836875916},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.425091952085495},{"id":"https://openalex.org/keywords/mathematics-education","display_name":"Mathematics education","score":0.3940192461013794},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3472793698310852},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3433466851711273}],"concepts":[{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.44328197836875916},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.425091952085495},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.3940192461013794},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3472793698310852},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3433466851711273}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2501.16344","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.16344","pdf_url":"https://arxiv.org/pdf/2501.16344","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2501.16344","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2501.16344","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2501.16344","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.16344","pdf_url":"https://arxiv.org/pdf/2501.16344","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1558286349","display_name":null,"funder_award_id":"CDC/NIOSH","funder_id":"https://openalex.org/F4320337382","funder_display_name":"National Institute for Occupational Safety and Health"},{"id":"https://openalex.org/G4410254120","display_name":null,"funder_award_id":"R01 AA028032","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G6213191006","display_name":null,"funder_award_id":"U01 OH012476","funder_id":"https://openalex.org/F4320337382","funder_display_name":"National Institute for Occupational Safety and Health"},{"id":"https://openalex.org/G6494207789","display_name":null,"funder_award_id":"R01 AA028032","funder_id":"https://openalex.org/F4320337330","funder_display_name":"National Institute on Alcohol Abuse and Alcoholism"},{"id":"https://openalex.org/G7422516467","display_name":null,"funder_award_id":"NIAAA","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320332162","display_name":"Centers for Disease Control and Prevention","ror":"https://ror.org/042twtr12"},{"id":"https://openalex.org/F4320337330","display_name":"National Institute on Alcohol Abuse and Alcoholism","ror":"https://ror.org/02jzrsm59"},{"id":"https://openalex.org/F4320337382","display_name":"National Institute for Occupational Safety and Health","ror":"https://ror.org/0502a2655"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4406937735.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Current":[0],"speech":[1,77,123],"encoding":[2],"pipelines":[3],"often":[4,23],"rely":[5],"on":[6,149],"an":[7,31,38,126],"additional":[8],"text-based":[9],"LM":[10,26,36,148],"to":[11,33,143,154],"get":[12,155],"robust":[13],"representations":[14,95],"of":[15,88,105,130,160],"human":[16,161],"communication,":[17],"even":[18],"though":[19],"SotA":[20],"speech-to-text":[21,150],"models":[22],"have":[24],"a":[25,59,67,72,97,145,156],"within.":[27],"This":[28],"work":[29],"proposes":[30],"approach":[32],"improve":[34],"the":[35,43,86],"within":[37],"audio":[39,61,82],"model":[40,69],"such":[41],"that":[42,137],"subsequent":[44,146],"text-LM":[45],"is":[46,139],"unnecessary.":[47],"We":[48],"introduce":[49],"WhiSPA":[50,120,135],"(Whisper":[51],"with":[52,66,93],"Semantic":[53],"and":[54,101,110,116,132],"Psychological":[55],"Alignment),":[56],"which":[57],"leverages":[58],"novel":[60],"training":[62],"objective:":[63],"contrastive":[64],"loss":[65],"language":[68],"embedding":[70],"as":[71],"teacher.":[73],"Using":[74],"over":[75],"500k":[76],"segments":[78],"from":[79,96],"mental":[80],"health":[81],"interviews,":[83],"we":[84],"evaluate":[85],"utility":[87],"aligning":[89],"Whisper's":[90],"latent":[91],"space":[92],"semantic":[94],"text":[98,147],"autoencoder":[99],"(SBERT)":[100],"lexically":[102],"derived":[103],"embeddings":[104],"basic":[106],"psychological":[107,118,158],"dimensions:":[108],"emotion":[109],"personality.":[111],"Over":[112],"self-supervised":[113],"affective":[114],"tasks":[115],"downstream":[117],"tasks,":[119],"surpasses":[121],"current":[122],"encoders,":[124],"achieving":[125],"average":[127],"error":[128],"reduction":[129],"73.4%":[131],"83.8%,":[133],"respectively.":[134],"demonstrates":[136],"it":[138],"not":[140],"always":[141],"necessary":[142],"run":[144],"output":[151],"in":[152],"order":[153],"rich":[157],"representation":[159],"communication.":[162]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
