{"id":"https://openalex.org/W3124524129","doi":"https://doi.org/10.1109/taslp.2021.3053387","title":"Synthesis and Analysis-By-Synthesis of Modulated Diplophonic Glottal Area Waveforms","display_name":"Synthesis and Analysis-By-Synthesis of Modulated Diplophonic Glottal Area Waveforms","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3124524129","doi":"https://doi.org/10.1109/taslp.2021.3053387","mag":"3124524129"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2021.3053387","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3053387","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/9289074/09330597.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ieeexplore.ieee.org/ielx7/6570655/9289074/09330597.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086225003","display_name":"Philipp Aichinger","orcid":"https://orcid.org/0000-0003-4353-4996"},"institutions":[{"id":"https://openalex.org/I76134821","display_name":"Medical University of Vienna","ror":"https://ror.org/05n3x4p02","country_code":"AT","type":"education","lineage":["https://openalex.org/I76134821"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Philipp Aichinger","raw_affiliation_strings":["Department of Otorhinolaryngology, Division of Phoniatrics-Logopedics, the Medical University of Vienna, Vienna, Austria"],"affiliations":[{"raw_affiliation_string":"Department of Otorhinolaryngology, Division of Phoniatrics-Logopedics, the Medical University of Vienna, Vienna, Austria","institution_ids":["https://openalex.org/I76134821"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015798259","display_name":"Franz Pernkopf","orcid":"https://orcid.org/0000-0002-6356-3367"},"institutions":[{"id":"https://openalex.org/I4092182","display_name":"Graz University of Technology","ror":"https://ror.org/00d7xrm67","country_code":"AT","type":"education","lineage":["https://openalex.org/I4092182"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Franz Pernkopf","raw_affiliation_strings":["Signal Processing and Speech Communication Laboratory (SPSC), the Graz University of Technology, Graz, Austria"],"affiliations":[{"raw_affiliation_string":"Signal Processing and Speech Communication Laboratory (SPSC), the Graz University of Technology, Graz, Austria","institution_ids":["https://openalex.org/I4092182"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5086225003"],"corresponding_institution_ids":["https://openalex.org/I76134821"],"apc_list":null,"apc_paid":null,"fwci":0.9518,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.79270023,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":93,"max":97},"biblio":{"volume":"29","issue":null,"first_page":"914","last_page":"926"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.630606472492218},{"id":"https://openalex.org/keywords/pulse","display_name":"Pulse (music)","score":0.5373014807701111},{"id":"https://openalex.org/keywords/modulation","display_name":"Modulation (music)","score":0.49445295333862305},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.491030216217041},{"id":"https://openalex.org/keywords/amplitude","display_name":"Amplitude","score":0.47533416748046875},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.45161134004592896},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.4452383816242218},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4394189119338989},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.42269057035446167},{"id":"https://openalex.org/keywords/kinematics","display_name":"Kinematics","score":0.4149150550365448},{"id":"https://openalex.org/keywords/vocal-folds","display_name":"Vocal folds","score":0.4143292307853699},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3755163550376892},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33515384793281555},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.31738001108169556},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.205389142036438},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.17085587978363037},{"id":"https://openalex.org/keywords/optics","display_name":"Optics","score":0.1255776286125183}],"concepts":[{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.630606472492218},{"id":"https://openalex.org/C2780167933","wikidata":"https://www.wikidata.org/wiki/Q1550652","display_name":"Pulse (music)","level":3,"score":0.5373014807701111},{"id":"https://openalex.org/C123079801","wikidata":"https://www.wikidata.org/wiki/Q750240","display_name":"Modulation (music)","level":2,"score":0.49445295333862305},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.491030216217041},{"id":"https://openalex.org/C180205008","wikidata":"https://www.wikidata.org/wiki/Q159190","display_name":"Amplitude","level":2,"score":0.47533416748046875},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45161134004592896},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.4452383816242218},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4394189119338989},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.42269057035446167},{"id":"https://openalex.org/C39920418","wikidata":"https://www.wikidata.org/wiki/Q11476","display_name":"Kinematics","level":2,"score":0.4149150550365448},{"id":"https://openalex.org/C2780336059","wikidata":"https://www.wikidata.org/wiki/Q215558","display_name":"Vocal folds","level":3,"score":0.4143292307853699},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3755163550376892},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33515384793281555},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.31738001108169556},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.205389142036438},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.17085587978363037},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.1255776286125183},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C2780474809","wikidata":"https://www.wikidata.org/wiki/Q9637","display_name":"Larynx","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2021.3053387","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3053387","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/9289074/09330597.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/taslp.2021.3053387","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3053387","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/9289074/09330597.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4225953306","display_name":null,"funder_award_id":"KLI 722-B30","funder_id":"https://openalex.org/F4320321181","funder_display_name":"Austrian Science Fund"}],"funders":[{"id":"https://openalex.org/F4320321181","display_name":"Austrian Science Fund","ror":"https://ror.org/013tf3c58"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3124524129.pdf","grobid_xml":"https://content.openalex.org/works/W3124524129.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W181056519","https://openalex.org/W398553055","https://openalex.org/W950991549","https://openalex.org/W1593286454","https://openalex.org/W1620414136","https://openalex.org/W1664163560","https://openalex.org/W1969863098","https://openalex.org/W1977956450","https://openalex.org/W1982473482","https://openalex.org/W1982976884","https://openalex.org/W1984352330","https://openalex.org/W1987313370","https://openalex.org/W1990285726","https://openalex.org/W2019974831","https://openalex.org/W2024677195","https://openalex.org/W2029030701","https://openalex.org/W2029460610","https://openalex.org/W2034628183","https://openalex.org/W2038799060","https://openalex.org/W2041352160","https://openalex.org/W2044686242","https://openalex.org/W2044803813","https://openalex.org/W2047006486","https://openalex.org/W2048235682","https://openalex.org/W2078039654","https://openalex.org/W2088339987","https://openalex.org/W2102518334","https://openalex.org/W2143535023","https://openalex.org/W2153004436","https://openalex.org/W2400743970","https://openalex.org/W2401864298","https://openalex.org/W2519091744","https://openalex.org/W2574055028","https://openalex.org/W2741728213","https://openalex.org/W2762646581","https://openalex.org/W2906467492","https://openalex.org/W2911728025","https://openalex.org/W2920145370","https://openalex.org/W2963300588","https://openalex.org/W2963589210","https://openalex.org/W2972521920","https://openalex.org/W2993150910","https://openalex.org/W3022454451","https://openalex.org/W3087129699","https://openalex.org/W4205974199","https://openalex.org/W4229763436","https://openalex.org/W6635618199","https://openalex.org/W6662193570","https://openalex.org/W6732028006","https://openalex.org/W6742167991","https://openalex.org/W6783818566"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2566616303","https://openalex.org/W2159052453","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W2803255133","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W4386815338","https://openalex.org/W2145836866"],"abstract_inverted_index":{"Diplophonia":[0],"is":[1,35,51,62,105,155,180,192,258],"a":[2,195],"type":[3],"of":[4,39,76,91,128,168,207,226,252,269],"disordered":[5],"voice":[6,66],"in":[7,16,49],"which":[8,144,170],"two":[9,25,69,215,229],"simultaneous":[10],"pitches":[11],"are":[12,22,74,98,120,135,220,275],"perceived.":[13],"Most":[14],"commonly":[15],"diplophonic":[17,65,92,161],"voices,":[18],"the":[19,36,40,43,55,96,126,129,201,213,224,227,241,246],"vocal":[20,44,93],"folds":[21],"divided":[23],"into":[24],"parts":[26],"that":[27,73],"vibrate":[28],"at":[29],"different":[30,80],"frequencies.":[31],"The":[32,46,60],"glottal":[33,47,56],"area":[34,38,48,57],"projected":[37],"space":[41],"between":[42],"folds.":[45],"time":[50],"referred":[52],"to":[53,100,137,147,157,160,165,194,233,277],"as":[54,148,162,164],"waveform":[58],"(GAW).":[59],"GAW":[61],"modeled":[63],"for":[64,261,280],"by":[67,108,187,231],"superimposing":[68],"partial":[70],"GAWs":[71,134,139,264],"(pGAWs)":[72],"trains":[75,151],"single-peak":[77],"pulses":[78],"with":[79],"pulse":[81,150,272],"frequencies,":[82],"i.e.,":[83],"fundamental":[84],"frequencies":[85],"($f_o$s).":[86],"In":[87,205,250],"current":[88],"kinematic":[89],"models":[90],"fold":[94],"vibration,":[95],"pGAWs":[97],"assumed":[99],"be":[101,158,278],"quasiperiodic.":[102],"This":[103],"assumption":[104],"mitigated":[106],"here":[107],"modulating":[109],"pulse-to-pulse":[110],"cycle":[111],"length":[112],"and":[113,117,177,200],"amplitude.":[114],"Both":[115],"random":[116,174,189,218,239],"deterministic":[118],"modulations":[119,123,184,219],"considered.":[121],"Deterministic":[122],"depend":[124],"on":[125],"difference":[127],"pGAWs'":[130],"instantaneous":[131],"phases.":[132],"Model":[133],"fitted":[136],"input":[138,263],"using":[140],"an":[141],"analysis-by-synthesis":[142],"approach":[143],"we":[145],"refer":[146],"`modulated":[149],"decomposition'":[152],"(MPD).":[153],"MPD":[154,191,211,222],"shown":[156,276],"applicable":[159],"well":[163],"nondiplophonic":[166,267],"types":[167,268],"dysphonia,":[169],"include":[171],"multi-pulse":[172],"patterns,":[173],"timing":[175,273],"behaviours,":[176],"chaos.":[178],"It":[179],"mostly":[181],"robust":[182],"against":[183],"but":[185],"degraded":[186],"large":[188,238],"modulations.":[190],"compared":[193],"deep":[196,242],"autoencoder":[197,243],"neural":[198,203],"network,":[199],"WaveGlow":[202,257],"network.":[204],"terms":[206,251],"time-domain":[208],"fitting":[209,248,255],"errors,":[210,256],"outperforms":[212,223],"other":[214,228],"approaches":[216,230],"unless":[217],"large.":[221],"best":[225],"up":[232],"approximately":[234],"5":[235],"dB.":[236],"For":[237],"modulations,":[240],"network":[244],"achieves":[245],"smallest":[247],"errors.":[249],"magnitude":[253],"spectrum":[254],"superior":[259],"except":[260],"natural":[262],"containing":[265],"only":[266],"dysphonia.":[270],"Also":[271],"errors":[274],"advantageous":[279],"MPD.":[281]},"counts_by_year":[{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
