{"id":"https://openalex.org/W4415707829","doi":"https://doi.org/10.1109/icme59968.2025.11209065","title":"PGD-N2L: A Parameter-Guided Disentanglement Approach for Normal-To-Lombard Speech Conversion","display_name":"PGD-N2L: A Parameter-Guided Disentanglement Approach for Normal-To-Lombard Speech Conversion","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415707829","doi":"https://doi.org/10.1109/icme59968.2025.11209065"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209065","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209065","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008473103","display_name":"Hongyang Chen","orcid":"https://orcid.org/0000-0002-7626-0162"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongyang Chen","raw_affiliation_strings":["Wuhan University,NERCMS, School of Computer Science, Hubei Luojia Laboratory,China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,NERCMS, School of Computer Science, Hubei Luojia Laboratory,China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102859655","display_name":"Yuhong Yang","orcid":"https://orcid.org/0000-0003-3001-7957"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhong Yang","raw_affiliation_strings":["Wuhan University,NERCMS, School of Computer Science, Hubei Luojia Laboratory,China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,NERCMS, School of Computer Science, Hubei Luojia Laboratory,China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050125868","display_name":"Xinmeng Xu","orcid":"https://orcid.org/0009-0008-1607-7180"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinmeng Xu","raw_affiliation_strings":["Wuhan University,NERCMS, School of Computer Science, Hubei Luojia Laboratory,China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,NERCMS, School of Computer Science, Hubei Luojia Laboratory,China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100435044","display_name":"Xingyu Liu","orcid":"https://orcid.org/0000-0001-9736-3948"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingyu Liu","raw_affiliation_strings":["Wuhan University,NERCMS, School of Computer Science, Hubei Luojia Laboratory,China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,NERCMS, School of Computer Science, Hubei Luojia Laboratory,China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041664453","display_name":"Weiping Tu","orcid":"https://orcid.org/0000-0002-6933-3298"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiping Tu","raw_affiliation_strings":["Wuhan University,NERCMS, School of Computer Science, Hubei Luojia Laboratory,China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,NERCMS, School of Computer Science, Hubei Luojia Laboratory,China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100741750","display_name":"Zhongyuan Wang","orcid":"https://orcid.org/0000-0002-9796-488X"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongyuan Wang","raw_affiliation_strings":["Wuhan University,NERCMS, School of Computer Science, Hubei Luojia Laboratory,China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,NERCMS, School of Computer Science, Hubei Luojia Laboratory,China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113361554","display_name":"Cedar Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cedar Lin","raw_affiliation_strings":["Guangdong OPPO Mobile Telecommunications Corp.,China"],"affiliations":[{"raw_affiliation_string":"Guangdong OPPO Mobile Telecommunications Corp.,China","institution_ids":["https://openalex.org/I180662265"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101572558","display_name":"Xin Zhao","orcid":"https://orcid.org/0000-0003-3610-1519"},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Zhao","raw_affiliation_strings":["Guangdong OPPO Mobile Telecommunications Corp.,China"],"affiliations":[{"raw_affiliation_string":"Guangdong OPPO Mobile Telecommunications Corp.,China","institution_ids":["https://openalex.org/I180662265"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5008473103"],"corresponding_institution_ids":["https://openalex.org/I37461747"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16418761,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.7710000276565552,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.7710000276565552,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.15219999849796295,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.014000000432133675,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.7856000065803528},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6258000135421753},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3962000012397766},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.35499998927116394},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.3336000144481659},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.3149999976158142},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.31119999289512634}],"concepts":[{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.7856000065803528},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7447999715805054},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7272999882698059},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6258000135421753},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3962000012397766},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.35499998927116394},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.3336000144481659},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.3149999976158142},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.31119999289512634},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.3109000027179718},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.29750001430511475},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2906000018119812},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.2773999869823456},{"id":"https://openalex.org/C131109320","wikidata":"https://www.wikidata.org/wiki/Q581012","display_name":"Linear prediction","level":2,"score":0.27219998836517334},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25380000472068787},{"id":"https://openalex.org/C2989496772","wikidata":"https://www.wikidata.org/wiki/Q52946","display_name":"Speech communication","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209065","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209065","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1974387177","https://openalex.org/W1978814037","https://openalex.org/W2010110013","https://openalex.org/W2085662862","https://openalex.org/W2107860279","https://openalex.org/W2129741985","https://openalex.org/W2471520273","https://openalex.org/W2750452568","https://openalex.org/W2763767706","https://openalex.org/W2808631503","https://openalex.org/W2810311710","https://openalex.org/W2911412811","https://openalex.org/W2936173958","https://openalex.org/W2962788625","https://openalex.org/W2981087920","https://openalex.org/W2983423850","https://openalex.org/W3024869864","https://openalex.org/W3034443407","https://openalex.org/W3087267027","https://openalex.org/W3148935696","https://openalex.org/W3209984917","https://openalex.org/W4225892118","https://openalex.org/W4372260053","https://openalex.org/W4386160241","https://openalex.org/W4392903821","https://openalex.org/W4402980879"],"related_works":[],"abstract_inverted_index":{"The":[0],"Normal-To-Lombard":[1],"(N2L)":[2],"speech":[3,8,39,56,61,153,158,174,186],"conversion":[4,57],"can":[5],"effectively":[6,172],"improve":[7,173],"intelligibility":[9,175],"in":[10,152],"noisy":[11],"communication":[12],"scenarios":[13],"and":[14,67,86,100,125,157,168,176],"serve":[15],"as":[16],"a":[17,50,77,91,96,101,107],"data":[18],"augmentation":[19],"tool":[20],"for":[21,54],"various":[22],"speech-related":[23],"algorithms.":[24],"However,":[25],"existing":[26,149],"N2L":[27,55,150],"methods":[28],"did":[29],"not":[30],"aim":[31],"to":[32,42,105,118,187],"disentangle":[33],"the":[34,121,129,164,169,180,184,191],"Lombard":[35,68,87,103,123,139,193],"effect":[36,124],"from":[37],"other":[38],"attributes,":[40],"leading":[41],"incomplete":[43],"conversions.":[44],"In":[45],"this":[46],"paper,":[47],"we":[48,75,89],"propose":[49,76,90],"Parameter-Guided":[51],"Disentanglement":[52],"approach":[53],"(PGD-N2L)":[58],"which":[59],"decomposes":[60],"into":[62,128],"linguistic":[63,73,130],"content,":[64,74],"speaker":[65,84,98,126,166],"identity,":[66],"effect.":[69],"To":[70,81],"extract":[71,82],"disentangled":[72,83],"DeLomb-Based":[78],"content":[79,131],"encoder.":[80],"identity":[85,127],"effect,":[88],"style":[92,109,135],"encoder":[93,99,104,167],"that":[94,144,163],"combines":[95],"fine-tuned":[97,165],"learnable":[102],"form":[106],"personalized":[108,134],"embedding.":[110],"Furthermore,":[111],"an":[112],"En-Lomb-Based":[113],"injection":[114],"module":[115],"is":[116],"designed":[117],"accurately":[119],"integrate":[120],"target":[122,192],"based":[132],"on":[133],"embedding,":[136],"ensuring":[137],"complete":[138],"conversion.":[140],"Experimental":[141],"results":[142],"demonstrate":[143],"our":[145],"proposed":[146],"method":[147],"outperforms":[148],"models":[151],"intelligibility,":[154],"acoustic":[155,177],"similarity,":[156,178],"quality.":[159],"Ablation":[160],"studies":[161],"confirm":[162],"De-Lomb":[170],"block":[171,182],"while":[179],"En-Lomb":[181],"enables":[183],"converted":[185],"more":[188],"closely":[189],"match":[190],"speech.":[194]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-30T00:00:00"}
