{"id":"https://openalex.org/W3082218567","doi":"https://doi.org/10.1109/spcom50965.2020.9179511","title":"Intelligibility Improvement of Dysarthric Speech using MMSE DiscoGAN","display_name":"Intelligibility Improvement of Dysarthric Speech using MMSE DiscoGAN","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3082218567","doi":"https://doi.org/10.1109/spcom50965.2020.9179511","mag":"3082218567"},"language":"en","primary_location":{"id":"doi:10.1109/spcom50965.2020.9179511","is_oa":false,"landing_page_url":"https://doi.org/10.1109/spcom50965.2020.9179511","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Conference on Signal Processing and Communications (SPCOM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091764885","display_name":"Mirali Purohit","orcid":"https://orcid.org/0009-0009-0354-4920"},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Mirali Purohit","raw_affiliation_strings":["Speech Research Lab, DA-IICT, Gandhinagar, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, DA-IICT, Gandhinagar, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018555559","display_name":"Maitreya Patel","orcid":null},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Maitreya Patel","raw_affiliation_strings":["Speech Research Lab, DA-IICT, Gandhinagar, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, DA-IICT, Gandhinagar, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037317140","display_name":"Harshit Malaviya","orcid":null},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Harshit Malaviya","raw_affiliation_strings":["Speech Research Lab, DA-IICT, Gandhinagar, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, DA-IICT, Gandhinagar, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079352542","display_name":"Ankur T. Patil","orcid":"https://orcid.org/0000-0002-5666-272X"},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Ankur Patil","raw_affiliation_strings":["Speech Research Lab, DA-IICT, Gandhinagar, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, DA-IICT, Gandhinagar, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063582658","display_name":"Mihir Parmar","orcid":null},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mihir Parmar","raw_affiliation_strings":["Arizona State University, Tempe, USA"],"affiliations":[{"raw_affiliation_string":"Arizona State University, Tempe, USA","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091143209","display_name":"Nirmesh J. Shah","orcid":"https://orcid.org/0000-0002-7294-6757"},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Nirmesh Shah","raw_affiliation_strings":["Speech Research Lab, DA-IICT, Gandhinagar, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, DA-IICT, Gandhinagar, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062542902","display_name":"Savan Doshi","orcid":null},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Savan Doshi","raw_affiliation_strings":["Speech Research Lab, DA-IICT, Gandhinagar, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, DA-IICT, Gandhinagar, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043002276","display_name":"Hemant A. Patil","orcid":"https://orcid.org/0000-0002-4068-2005"},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Hemant A. Patil","raw_affiliation_strings":["Speech Research Lab, DA-IICT, Gandhinagar, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, DA-IICT, Gandhinagar, India","institution_ids":["https://openalex.org/I98389781"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5091764885"],"corresponding_institution_ids":["https://openalex.org/I98389781"],"apc_list":null,"apc_paid":null,"fwci":1.193,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.83649108,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.8445101976394653},{"id":"https://openalex.org/keywords/dysarthria","display_name":"Dysarthria","score":0.7901115417480469},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7844699621200562},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6853880882263184},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4538671672344208},{"id":"https://openalex.org/keywords/speech-production","display_name":"Speech production","score":0.44895070791244507},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.44413459300994873},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32666105031967163},{"id":"https://openalex.org/keywords/audiology","display_name":"Audiology","score":0.12953898310661316}],"concepts":[{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.8445101976394653},{"id":"https://openalex.org/C2777639682","wikidata":"https://www.wikidata.org/wiki/Q225957","display_name":"Dysarthria","level":2,"score":0.7901115417480469},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7844699621200562},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6853880882263184},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4538671672344208},{"id":"https://openalex.org/C43617652","wikidata":"https://www.wikidata.org/wiki/Q7575399","display_name":"Speech production","level":2,"score":0.44895070791244507},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.44413459300994873},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32666105031967163},{"id":"https://openalex.org/C548259974","wikidata":"https://www.wikidata.org/wiki/Q569965","display_name":"Audiology","level":1,"score":0.12953898310661316},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/spcom50965.2020.9179511","is_oa":false,"landing_page_url":"https://doi.org/10.1109/spcom50965.2020.9179511","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Conference on Signal Processing and Communications (SPCOM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W5057334","https://openalex.org/W180052447","https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W2011378162","https://openalex.org/W2035424729","https://openalex.org/W2042402023","https://openalex.org/W2046033161","https://openalex.org/W2077709676","https://openalex.org/W2077804127","https://openalex.org/W2106359072","https://openalex.org/W2124641009","https://openalex.org/W2145130307","https://openalex.org/W2295406225","https://openalex.org/W2399401597","https://openalex.org/W2476324168","https://openalex.org/W2545886659","https://openalex.org/W2562848127","https://openalex.org/W2576309025","https://openalex.org/W2598581049","https://openalex.org/W2608185040","https://openalex.org/W2739735615","https://openalex.org/W2740399610","https://openalex.org/W2765811365","https://openalex.org/W2899748887","https://openalex.org/W2951939904","https://openalex.org/W2963971656","https://openalex.org/W2964121744","https://openalex.org/W2972689158","https://openalex.org/W2987496713","https://openalex.org/W2997912476","https://openalex.org/W3012437242","https://openalex.org/W3015465870","https://openalex.org/W6607307618","https://openalex.org/W6631190155","https://openalex.org/W6631362777","https://openalex.org/W6676118236","https://openalex.org/W6697307144","https://openalex.org/W6729138736","https://openalex.org/W6735204497","https://openalex.org/W6755932049"],"related_works":["https://openalex.org/W2084983866","https://openalex.org/W3012459429","https://openalex.org/W3015238377","https://openalex.org/W2027206961","https://openalex.org/W2318301054","https://openalex.org/W2754882168","https://openalex.org/W2091380405","https://openalex.org/W2299746430","https://openalex.org/W4312547701","https://openalex.org/W2890040947"],"abstract_inverted_index":{"Dysarthria":[0],"is":[1,43,50,59,124,154],"a":[2,60,119,149],"manifestation":[3],"of":[4,40,53,89,164,205],"the":[5,38,129,132,136,143,166,169,179,203,215],"disordering":[6],"in":[7,18,26,71],"articulatory":[8],"parts":[9],"that":[10,63,183],"are":[11,69],"used":[12,125],"during":[13],"speech":[14,23,25,42,65,68,72,115,212],"production,":[15],"which":[16],"results":[17],"uneven,":[19],"slow,":[20],"slurred,":[21],"monotone":[22],"or":[24],"an":[27],"abnormal":[28],"rhythm.":[29],"People":[30],"with":[31,104,156],"dysarthria":[32],"produce":[33],"less":[34,51],"intelligible":[35],"speech.":[36,57],"Improving":[37],"intelligibility":[39,204],"dysarthric":[41,56,64,206],"challenging":[44],"because":[45],"unlike":[46],"normal":[47,67],"speech,":[48,207],"there":[49],"amount":[52],"data":[54],"for":[55,113,148,192],"It":[58],"known":[61],"fact":[62],"and":[66,168,190,194,208],"different":[70],"production-perception":[73],"perspectives.":[74],"Recently,":[75],"Generative":[76],"Adversarial":[77],"Network":[78,160],"(GAN)-based":[79],"architectures":[80,167],"have":[81],"become":[82],"more":[83,210],"popular":[84],"to":[85,98,126,141,214],"learn":[86],"such":[87],"kind":[88],"cross-domain":[90],"relationships":[91],"efficiently.":[92],"In":[93,117,131],"this":[94],"paper,":[95],"we":[96,134,181],"propose":[97],"use":[99,135],"Discover":[100],"GAN":[101],"(DiscoGAN)":[102],"along":[103],"Mean":[105],"Square":[106],"Error":[107,145],"(MSE)":[108],"regularization":[109],"(i.e.,":[110],"MMSE":[111,184],"DiscoGAN)":[112],"Dysarthric-to-Normal":[114],"conversion.":[116],"particular,":[118],"direct":[120],"feature-based":[121],"mapping":[122],"technique":[123],"train":[127],"all":[128],"models.":[130,217],"end,":[133],"Automatic":[137],"Speech":[138],"Recognition":[139],"(ASR)":[140],"measure":[142],"Phoneme":[144],"Rate":[146],"(PER)":[147],"particular":[150],"speaker.":[151],"Proposed":[152],"method":[153],"compared":[155,213],"baseline":[157],"Deep":[158],"Neural":[159],"(DNN)-based":[161],"system.":[162],"Training":[163],"both":[165],"evaluations":[170],"were":[171],"carried":[172],"out":[173],"on":[174],"UA":[175],"corpus.":[176],"By":[177],"analyzing":[178],"results,":[180],"observed":[182],"DiscoGAN":[185],"outperforms":[186],"DNN":[187],"by":[188],"13.16%":[189],"9.64%":[191],"male":[193],"female,":[195],"respectively.":[196],"Moreover,":[197],"proposed":[198],"GAN-based":[199],"frameworks":[200],"efficiently":[201],"improve":[202],"generate":[209],"naturalsounding":[211],"DNN-based":[216]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
