{"id":"https://openalex.org/W4312433898","doi":"https://doi.org/10.1109/icpr56361.2022.9956600","title":"Learning Speaker-specific Lip-to-Speech Generation","display_name":"Learning Speaker-specific Lip-to-Speech Generation","publication_year":2022,"publication_date":"2022-08-21","ids":{"openalex":"https://openalex.org/W4312433898","doi":"https://doi.org/10.1109/icpr56361.2022.9956600"},"language":"en","primary_location":{"id":"doi:10.1109/icpr56361.2022.9956600","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956600","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036559623","display_name":"Munender Varshney","orcid":"https://orcid.org/0000-0002-3061-5757"},"institutions":[{"id":"https://openalex.org/I94234084","display_name":"Indian Institute of Technology Kanpur","ror":"https://ror.org/05pjsgx75","country_code":"IN","type":"education","lineage":["https://openalex.org/I94234084"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Munender Varshney","raw_affiliation_strings":["Indian institute of Technology,Computer Science and Engineering department,Kanpur,India","Computer Science and Engineering department, Indian institute of Technology, Kanpur, India"],"affiliations":[{"raw_affiliation_string":"Indian institute of Technology,Computer Science and Engineering department,Kanpur,India","institution_ids":["https://openalex.org/I94234084"]},{"raw_affiliation_string":"Computer Science and Engineering department, Indian institute of Technology, Kanpur, India","institution_ids":["https://openalex.org/I94234084"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010648323","display_name":"Ravindra Yadav","orcid":"https://orcid.org/0000-0003-4628-0688"},"institutions":[{"id":"https://openalex.org/I94234084","display_name":"Indian Institute of Technology Kanpur","ror":"https://ror.org/05pjsgx75","country_code":"IN","type":"education","lineage":["https://openalex.org/I94234084"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Ravindra Yadav","raw_affiliation_strings":["Indian institute of Technology,Electrical department,Kanpur,India","Electrical department, Indian institute of Technology, Kanpur, India"],"affiliations":[{"raw_affiliation_string":"Indian institute of Technology,Electrical department,Kanpur,India","institution_ids":["https://openalex.org/I94234084"]},{"raw_affiliation_string":"Electrical department, Indian institute of Technology, Kanpur, India","institution_ids":["https://openalex.org/I94234084"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007109424","display_name":"Vinay P. Namboodiri","orcid":"https://orcid.org/0000-0001-5262-9722"},"institutions":[{"id":"https://openalex.org/I51601045","display_name":"University of Bath","ror":"https://ror.org/002h8g185","country_code":"GB","type":"education","lineage":["https://openalex.org/I51601045"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Vinay P. Namboodiri","raw_affiliation_strings":["University of Bath,UK","University of Bath, UK"],"affiliations":[{"raw_affiliation_string":"University of Bath,UK","institution_ids":["https://openalex.org/I51601045"]},{"raw_affiliation_string":"University of Bath, UK","institution_ids":["https://openalex.org/I51601045"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085503354","display_name":"Rajesh M. Hegde","orcid":"https://orcid.org/0000-0002-6142-7724"},"institutions":[{"id":"https://openalex.org/I94234084","display_name":"Indian Institute of Technology Kanpur","ror":"https://ror.org/05pjsgx75","country_code":"IN","type":"education","lineage":["https://openalex.org/I94234084"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Rajesh M Hegde","raw_affiliation_strings":["Indian institute of Technology,Electrical department,Kanpur,India","Electrical department, Indian institute of Technology, Kanpur, India"],"affiliations":[{"raw_affiliation_string":"Indian institute of Technology,Electrical department,Kanpur,India","institution_ids":["https://openalex.org/I94234084"]},{"raw_affiliation_string":"Electrical department, Indian institute of Technology, Kanpur, India","institution_ids":["https://openalex.org/I94234084"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5036559623"],"corresponding_institution_ids":["https://openalex.org/I94234084"],"apc_list":null,"apc_paid":null,"fwci":0.98,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.77946288,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"491","last_page":"498"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8281964659690857},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6963880062103271},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5548007488250732},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5405233502388},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5245440602302551},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.4555656909942627},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.432483047246933},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3652811050415039},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2023329734802246}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8281964659690857},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6963880062103271},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5548007488250732},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5405233502388},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5245440602302551},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.4555656909942627},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.432483047246933},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3652811050415039},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2023329734802246},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icpr56361.2022.9956600","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956600","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8500000238418579,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G487895436","display_name":null,"funder_award_id":"EP/T022523/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G5212373428","display_name":null,"funder_award_id":"EP/M023281/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":79,"referenced_works":["https://openalex.org/W1527102103","https://openalex.org/W1552314771","https://openalex.org/W1686810756","https://openalex.org/W1810943226","https://openalex.org/W1959608418","https://openalex.org/W2015143272","https://openalex.org/W2050708608","https://openalex.org/W2060510034","https://openalex.org/W2067295501","https://openalex.org/W2085052862","https://openalex.org/W2093378872","https://openalex.org/W2097117768","https://openalex.org/W2127141656","https://openalex.org/W2156017586","https://openalex.org/W2163605009","https://openalex.org/W2243738093","https://openalex.org/W2516001803","https://openalex.org/W2551572271","https://openalex.org/W2556171197","https://openalex.org/W2585824449","https://openalex.org/W2890952074","https://openalex.org/W2891205112","https://openalex.org/W2896457183","https://openalex.org/W2897318954","https://openalex.org/W2916877561","https://openalex.org/W2963019222","https://openalex.org/W2963528589","https://openalex.org/W2963936489","https://openalex.org/W2964110616","https://openalex.org/W2964171275","https://openalex.org/W2964283370","https://openalex.org/W2964352155","https://openalex.org/W2965373594","https://openalex.org/W2970641574","https://openalex.org/W2972563022","https://openalex.org/W2972775954","https://openalex.org/W2981501041","https://openalex.org/W2981905048","https://openalex.org/W2987787977","https://openalex.org/W2996428491","https://openalex.org/W3009270141","https://openalex.org/W3010048580","https://openalex.org/W3013310839","https://openalex.org/W3014611590","https://openalex.org/W3015625764","https://openalex.org/W3015830103","https://openalex.org/W3025520547","https://openalex.org/W3034673405","https://openalex.org/W3034999214","https://openalex.org/W3035626590","https://openalex.org/W3043840704","https://openalex.org/W3096316132","https://openalex.org/W3096650361","https://openalex.org/W3097777922","https://openalex.org/W3118548710","https://openalex.org/W3119469378","https://openalex.org/W3130008318","https://openalex.org/W3153832461","https://openalex.org/W3160305627","https://openalex.org/W3174856432","https://openalex.org/W3213191779","https://openalex.org/W4224916651","https://openalex.org/W4298112588","https://openalex.org/W4385245566","https://openalex.org/W6637373629","https://openalex.org/W6638273328","https://openalex.org/W6640963894","https://openalex.org/W6684191040","https://openalex.org/W6732872814","https://openalex.org/W6734491695","https://openalex.org/W6739901393","https://openalex.org/W6754392867","https://openalex.org/W6754420807","https://openalex.org/W6766673545","https://openalex.org/W6768021236","https://openalex.org/W6774882322","https://openalex.org/W6775188310","https://openalex.org/W6787924464","https://openalex.org/W6788231789"],"related_works":["https://openalex.org/W2167155152","https://openalex.org/W2136763963","https://openalex.org/W2109705048","https://openalex.org/W2940588515","https://openalex.org/W1521297879","https://openalex.org/W2036150633","https://openalex.org/W3184123547","https://openalex.org/W1909151225","https://openalex.org/W1987783679","https://openalex.org/W2160030256"],"abstract_inverted_index":{"Understanding":[0],"the":[1,6,14,28,60,65,81,90,98,120,137,150,212],"lip":[2,68,129,166],"movement":[3,69,167],"and":[4,30,42,52,64,76,96,108,152,178,209],"inferring":[5],"speech":[7,53,63,124,139,162],"from":[8,24,49,89,165],"it":[9],"is":[10],"notoriously":[11],"difficult":[12],"for":[13],"common":[15],"person.":[16],"The":[17,131],"task":[18],"of":[19,27,67,70,87,105],"accurate":[20],"lip-reading":[21],"gets":[22],"help":[23],"various":[25,176],"cues":[26],"speaker":[29,37,141,160],"its":[31],"contextual":[32],"or":[33],"environmental":[34],"setting.":[35,172],"Every":[36],"has":[38],"a":[39,85,201],"different":[40],"accent":[41],"speaking":[43,142],"style,":[44],"which":[45,118],"can":[46],"be":[47],"inferred":[48],"their":[50],"visual":[51],"features.":[54],"This":[55],"work":[56],"aims":[57],"to":[58,122,157],"understand":[59],"correlation/mapping":[61],"between":[62],"sequence":[66,83],"individual":[71],"speakers":[72],"in":[73,92,125,140,168,196],"an":[74,93,169,197],"unconstrained":[75,170,198],"large":[77],"vocabulary.":[78],"We":[79,110,144],"model":[80,148],"frame":[82],"as":[84],"distribution":[86],"features":[88],"transformer":[91],"autoencoder":[94],"setting":[95],"learn":[97,111],"embeddings":[99],"jointly":[100],"that":[101,186],"exploits":[102],"temporal":[103,112],"properties":[104],"both":[106],"audio":[107],"video.":[109],"synchronization":[113],"using":[114,175],"deep":[115],"metric":[116],"learning,":[117],"guides":[119],"decoder":[121],"generate":[123],"sync":[126],"with":[127,181],"input":[128],"movements.":[130],"predictive":[132],"posterior":[133],"thus":[134],"gives":[135],"us":[136],"generated":[138],"style.":[143],"have":[145],"trained":[146],"our":[147,187],"on":[149,190,214],"Grid":[151],"Lip2Wav":[153,191],"Chemistry":[154,192],"lecture":[155],"dataset":[156,193],"evaluate":[158],"single":[159],"natural":[161,171],"generation":[163],"tasks":[164],"Extensive":[173],"evaluation":[174,183,207],"qualitative":[177],"quantitative":[179],"metrics":[180,208],"human":[182],"also":[184],"shows":[185],"method":[188],"outperforms":[189,211],"(large":[194],"vocabulary":[195],"setting)":[199],"by":[200],"good":[202],"margin":[203],"across":[204],"almost":[205],"all":[206],"marginally":[210],"state-of-the-art":[213],"GRID":[215],"dataset.":[216]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2026-02-27T16:54:17.756197","created_date":"2025-10-10T00:00:00"}
