{"id":"https://openalex.org/W4416251422","doi":"https://doi.org/10.1109/ijcnn64981.2025.11229167","title":"PromptLTS: Text-Guided Controllable Speech Generation from Lip Movements","display_name":"PromptLTS: Text-Guided Controllable Speech Generation from Lip Movements","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416251422","doi":"https://doi.org/10.1109/ijcnn64981.2025.11229167"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11229167","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11229167","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010622981","display_name":"Wenlong Xu","orcid":"https://orcid.org/0000-0001-5505-4776"},"institutions":[{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenlong Xu","raw_affiliation_strings":["Zhejiang University,School of Software Technology,Ningbo,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,School of Software Technology,Ningbo,China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009366733","display_name":"Xuelin Cheng","orcid":null},"institutions":[{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuelin Cheng","raw_affiliation_strings":["Zhejiang University,School of Software Technology,Ningbo,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,School of Software Technology,Ningbo,China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059174471","display_name":"Yanliang Tan","orcid":"https://orcid.org/0000-0002-3173-0968"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]},{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanliang Tan","raw_affiliation_strings":["Zhejiang University,School of Software Technology,Ningbo,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,School of Software Technology,Ningbo,China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100450363","display_name":"Shuo Zhang","orcid":"https://orcid.org/0000-0002-2284-7427"},"institutions":[{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuo Zhang","raw_affiliation_strings":["Zhejiang University,School of Software Technology,Ningbo,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,School of Software Technology,Ningbo,China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102224377","display_name":"Junlin An","orcid":"https://orcid.org/0009-0008-7830-2830"},"institutions":[{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junlin An","raw_affiliation_strings":["Zhejiang University,School of Software Technology,Ningbo,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,School of Software Technology,Ningbo,China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5010622981"],"corresponding_institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.42139592,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9246000051498413,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9246000051498413,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.03139999881386757,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.005100000184029341,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/timbre","display_name":"Timbre","score":0.6606000065803528},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5533999800682068},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.46399998664855957},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4620000123977661},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.45419999957084656},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4307999908924103},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.39719998836517334}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7519000172615051},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7335000038146973},{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.6606000065803528},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5533999800682068},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.46399998664855957},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4620000123977661},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.45419999957084656},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4307999908924103},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.39719998836517334},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3855000138282776},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3806000053882599},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.351500004529953},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3151000142097473},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3059000074863434},{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.3043999969959259},{"id":"https://openalex.org/C43617652","wikidata":"https://www.wikidata.org/wiki/Q7575399","display_name":"Speech production","level":2,"score":0.30329999327659607},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.29899999499320984},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.26660001277923584}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11229167","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11229167","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322927","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W160640889","https://openalex.org/W2015143272","https://openalex.org/W2194775991","https://openalex.org/W2585824449","https://openalex.org/W2962788625","https://openalex.org/W2963019222","https://openalex.org/W2964243274","https://openalex.org/W2964352155","https://openalex.org/W2972563022","https://openalex.org/W3035626590","https://openalex.org/W3094650042","https://openalex.org/W3157840621","https://openalex.org/W3160305627","https://openalex.org/W3197541421","https://openalex.org/W3197704090","https://openalex.org/W3198123658","https://openalex.org/W4283809657","https://openalex.org/W4296069328","https://openalex.org/W4313065316","https://openalex.org/W4375868850","https://openalex.org/W4375869257","https://openalex.org/W4385822534","https://openalex.org/W4385822787","https://openalex.org/W4386057728","https://openalex.org/W4392908903","https://openalex.org/W4402111427","https://openalex.org/W4402671873"],"related_works":[],"abstract_inverted_index":{"Controllable":[0],"text-to-speech":[1],"has":[2],"drawn":[3],"wide":[4],"attention":[5],"recently.":[6],"However,":[7,80],"there":[8,129],"is":[9,42,61,74,130],"currently":[10],"no":[11,131],"research":[12],"specifically":[13],"focused":[14],"on":[15],"controllable":[16],"lip-to-speech.":[17],"In":[18],"this":[19,97,102],"work,":[20],"we":[21,104,142],"explore":[22],"the":[23,31,77,90,111,124,138,159],"possibility":[24],"of":[25,33,45,89,123,140],"utilizing":[26],"text":[27],"descriptions":[28],"to":[29,109,136,151],"guide":[30],"synthesis":[32],"speech":[34,55,92,169,176],"through":[35],"lip":[36,51,161],"movements,":[37],"known":[38],"as":[39,84],"PromptLTS.":[40],"PromptLTS":[41,166],"mainly":[43],"composed":[44],"a":[46,50,54,68,106,144,153],"style":[47,156,172],"prompt":[48],"encoder,":[49],"encoder":[52],"and":[53,63,87,114,158,174],"decoder.":[56],"Since":[57],"each":[58],"person\u2019s":[59],"timbre":[60,113],"unique":[62],"cannot":[64],"be":[65,94],"described":[66],"by":[67,96,120],"prompt,":[69],"an":[70],"additional":[71],"reference":[72,98,125],"audio":[73],"added":[75],"during":[76],"input":[78],"stage.":[79],"other":[81,121],"factors":[82,122],"(such":[83],"pitch,":[85],"speed,":[86],"energy)":[88],"generated":[91],"may":[93],"influenced":[95],"audio.":[99,126],"To":[100],"address":[101],"problem,":[103],"adopt":[105],"disentangling":[107],"module":[108],"extract":[110],"speaker\u2019s":[112],"prevent":[115],"it":[116],"from":[117],"being":[118],"affected":[119],"Given":[127],"that":[128,146,165],"LTS":[132],"dataset":[133,154],"with":[134,170],"prompts,":[135],"benchmark":[137],"task":[139],"PromptLTS,":[141],"provide":[143],"pipeline":[145],"utilizes":[147],"large":[148],"language":[149],"models":[150],"construct":[152],"including":[155],"prompts":[157],"corresponding":[160],"movements.":[162],"Experiments":[163],"show":[164],"can":[167],"generate":[168],"precise":[171],"control":[173],"high":[175],"quality.":[177]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-14T00:00:00"}
