{"id":"https://openalex.org/W4410771218","doi":"https://doi.org/10.1109/icasspw65056.2025.11011178","title":"Prosody Disentanglement with Self-Supervised Speech Representation for Detecting Depression","display_name":"Prosody Disentanglement with Self-Supervised Speech Representation for Detecting Depression","publication_year":2025,"publication_date":"2025-04-06","ids":{"openalex":"https://openalex.org/W4410771218","doi":"https://doi.org/10.1109/icasspw65056.2025.11011178"},"language":"en","primary_location":{"id":"doi:10.1109/icasspw65056.2025.11011178","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icasspw65056.2025.11011178","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005410399","display_name":"Bubai Maji","orcid":"https://orcid.org/0000-0002-6995-0317"},"institutions":[{"id":"https://openalex.org/I145894827","display_name":"Indian Institute of Technology Kharagpur","ror":"https://ror.org/03w5sq511","country_code":"IN","type":"education","lineage":["https://openalex.org/I145894827"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Bubai Maji","raw_affiliation_strings":["IIT Kharagpur,Rekhi Centre of Excellence for the Science of Happiness,India"],"affiliations":[{"raw_affiliation_string":"IIT Kharagpur,Rekhi Centre of Excellence for the Science of Happiness,India","institution_ids":["https://openalex.org/I145894827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073333260","display_name":"Rajlakshmi Guha","orcid":null},"institutions":[{"id":"https://openalex.org/I145894827","display_name":"Indian Institute of Technology Kharagpur","ror":"https://ror.org/03w5sq511","country_code":"IN","type":"education","lineage":["https://openalex.org/I145894827"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Rajlakshmi Guha","raw_affiliation_strings":["IIT Kharagpur,Rekhi Centre of Excellence for the Science of Happiness,India"],"affiliations":[{"raw_affiliation_string":"IIT Kharagpur,Rekhi Centre of Excellence for the Science of Happiness,India","institution_ids":["https://openalex.org/I145894827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045754565","display_name":"Aurobinda Routray","orcid":"https://orcid.org/0000-0003-2750-6768"},"institutions":[{"id":"https://openalex.org/I145894827","display_name":"Indian Institute of Technology Kharagpur","ror":"https://ror.org/03w5sq511","country_code":"IN","type":"education","lineage":["https://openalex.org/I145894827"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Aurobinda Routray","raw_affiliation_strings":["IIT Kharagpur,Department of Electrical Engineering,India"],"affiliations":[{"raw_affiliation_string":"IIT Kharagpur,Department of Electrical Engineering,India","institution_ids":["https://openalex.org/I145894827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043167162","display_name":"Shazia Nasreen","orcid":"https://orcid.org/0000-0002-8894-2670"},"institutions":[{"id":"https://openalex.org/I145894827","display_name":"Indian Institute of Technology Kharagpur","ror":"https://ror.org/03w5sq511","country_code":"IN","type":"education","lineage":["https://openalex.org/I145894827"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shazia Nasreen","raw_affiliation_strings":["IIT Kharagpur,Rekhi Centre of Excellence for the Science of Happiness,India"],"affiliations":[{"raw_affiliation_string":"IIT Kharagpur,Rekhi Centre of Excellence for the Science of Happiness,India","institution_ids":["https://openalex.org/I145894827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111725712","display_name":"Debabrata Majumdar","orcid":null},"institutions":[{"id":"https://openalex.org/I145894827","display_name":"Indian Institute of Technology Kharagpur","ror":"https://ror.org/03w5sq511","country_code":"IN","type":"education","lineage":["https://openalex.org/I145894827"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Debabrata Majumdar","raw_affiliation_strings":["IIT Kharagpur,B C Roy Technology Hospital,India"],"affiliations":[{"raw_affiliation_string":"IIT Kharagpur,B C Roy Technology Hospital,India","institution_ids":["https://openalex.org/I145894827"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5005410399"],"corresponding_institution_ids":["https://openalex.org/I145894827"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05976147,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8715000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8715000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.8589000105857849,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.8662192821502686},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6400487422943115},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6288259625434875},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6171314716339111},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.49067628383636475},{"id":"https://openalex.org/keywords/depression","display_name":"Depression (economics)","score":0.43937310576438904},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4182721972465515},{"id":"https://openalex.org/keywords/self-representation","display_name":"Self representation","score":0.411366730928421}],"concepts":[{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.8662192821502686},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6400487422943115},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6288259625434875},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6171314716339111},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49067628383636475},{"id":"https://openalex.org/C2776867660","wikidata":"https://www.wikidata.org/wiki/Q1814941","display_name":"Depression (economics)","level":2,"score":0.43937310576438904},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4182721972465515},{"id":"https://openalex.org/C2988612419","wikidata":"https://www.wikidata.org/wiki/Q7448312","display_name":"Self representation","level":2,"score":0.411366730928421},{"id":"https://openalex.org/C139719470","wikidata":"https://www.wikidata.org/wiki/Q39680","display_name":"Macroeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C15708023","wikidata":"https://www.wikidata.org/wiki/Q80083","display_name":"Humanities","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icasspw65056.2025.11011178","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icasspw65056.2025.11011178","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6600000262260437,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2157741754","https://openalex.org/W2407080277","https://openalex.org/W2530421149","https://openalex.org/W2805409402","https://openalex.org/W2889056793","https://openalex.org/W2896457183","https://openalex.org/W2936774411","https://openalex.org/W2963914279","https://openalex.org/W3015241559","https://openalex.org/W3024869864","https://openalex.org/W3126625480","https://openalex.org/W3204491229","https://openalex.org/W3209059054","https://openalex.org/W4214576234","https://openalex.org/W4225304582","https://openalex.org/W4254718357","https://openalex.org/W4307820296","https://openalex.org/W4372260229","https://openalex.org/W4385823099","https://openalex.org/W4389538363","https://openalex.org/W4400786745","https://openalex.org/W4401446844","https://openalex.org/W4401609030","https://openalex.org/W4401609199","https://openalex.org/W4402111409","https://openalex.org/W4402112095","https://openalex.org/W4402112313","https://openalex.org/W6675409298","https://openalex.org/W6780218876"],"related_works":["https://openalex.org/W2355553914","https://openalex.org/W149862513","https://openalex.org/W2347684782","https://openalex.org/W187117048","https://openalex.org/W4320472397","https://openalex.org/W2401269021","https://openalex.org/W2145654520","https://openalex.org/W2605677994","https://openalex.org/W3120327675","https://openalex.org/W2116310671"],"abstract_inverted_index":{"Speech":[0],"signals":[1],"offer":[2],"valuable":[3],"insights":[4],"into":[5],"mental":[6],"health,":[7],"especially":[8],"in":[9,31],"diagnosing":[10],"depressive":[11,48],"disorders.":[12],"Human":[13],"speech":[14,125,132],"encompasses":[15],"various":[16],"components,":[17],"such":[18],"as":[19],"semantic":[20,83],"content,":[21],"speaker":[22],"identity,":[23],"and":[24,43,78,85,144],"prosodic":[25,33,117],"information.":[26],"A":[27],"key":[28],"challenge":[29],"remains":[30],"disentangling":[32],"information":[34,61],"from":[35],"other":[36],"components":[37],"due":[38],"to":[39,55,99,122],"their":[40],"intrinsic":[41],"association":[42],"the":[44,57,94,112],"need":[45],"for":[46,68],"robust":[47],"disorders":[49],"detection":[50],"systems.":[51],"This":[52],"paper":[53],"aims":[54],"address":[56,100],"disentanglement":[58],"of":[59,96],"prosody":[60,87],"by":[62],"leveraging":[63],"self-supervised":[64,131,145],"learning":[65],"(SSL)":[66],"model":[67,113,133],"depression":[69],"detection.":[70],"Specifically,":[71],"our":[72,107],"model,":[73],"DepAug,":[74],"captures":[75,115],"both":[76],"verbal":[77],"non-verbal":[79],"characteristics":[80,118],"through":[81],"a":[82,86,130],"encoder":[84],"encoder.":[88],"The":[89],"decoder":[90],"component":[91],"then":[92],"enables":[93],"generation":[95],"synthetic":[97],"samples":[98],"data":[101,136],"imbalance":[102],"issues.":[103],"Experimental":[104],"results":[105],"on":[106],"Bengali":[108],"dataset":[109],"show":[110],"that":[111,119,139],"accurately":[114],"general":[116],"can":[120],"adapt":[121],"diverse":[123],"emotional":[124],"contexts.":[126],"Additionally,":[127],"we":[128],"train":[129],"using":[134],"DepAug\u2019s":[135],"augmentation,":[137],"showing":[138],"it":[140],"outperforms":[141],"state-of-the-art":[142],"supervised":[143],"approaches.":[146]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
