{"id":"https://openalex.org/W3015301228","doi":"https://doi.org/10.1109/icassp40776.2020.9053299","title":"Cogans For Unsupervised Visual Speech Adaptation To New Speakers","display_name":"Cogans For Unsupervised Visual Speech Adaptation To New Speakers","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015301228","doi":"https://doi.org/10.1109/icassp40776.2020.9053299","mag":"3015301228"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053299","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053299","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079540599","display_name":"Adriana Fernandez-Lopez","orcid":"https://orcid.org/0000-0001-8839-6537"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Pompeu Fabra University","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Adriana Fernandez-Lopez","raw_affiliation_strings":["Department of Information and Communication Technologies, Pompeu Fabra University, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Information and Communication Technologies, Pompeu Fabra University, Spain","institution_ids":["https://openalex.org/I170486558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020509870","display_name":"Ali Karaali","orcid":"https://orcid.org/0000-0002-4154-4181"},"institutions":[{"id":"https://openalex.org/I205274468","display_name":"Trinity College Dublin","ror":"https://ror.org/02tyrky19","country_code":"IE","type":"education","lineage":["https://openalex.org/I205274468"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Ali Karaali","raw_affiliation_strings":["Sigmedia, ADAPT Centre, School of Engineering, Trinity College, Dublin, Ireland"],"affiliations":[{"raw_affiliation_string":"Sigmedia, ADAPT Centre, School of Engineering, Trinity College, Dublin, Ireland","institution_ids":["https://openalex.org/I205274468"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042231269","display_name":"Naomi Harte","orcid":"https://orcid.org/0000-0002-9274-209X"},"institutions":[{"id":"https://openalex.org/I205274468","display_name":"Trinity College Dublin","ror":"https://ror.org/02tyrky19","country_code":"IE","type":"education","lineage":["https://openalex.org/I205274468"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Naomi Harte","raw_affiliation_strings":["Sigmedia, ADAPT Centre, School of Engineering, Trinity College, Dublin, Ireland"],"affiliations":[{"raw_affiliation_string":"Sigmedia, ADAPT Centre, School of Engineering, Trinity College, Dublin, Ireland","institution_ids":["https://openalex.org/I205274468"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019420087","display_name":"Federico M. Sukno","orcid":"https://orcid.org/0000-0002-2029-1576"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Pompeu Fabra University","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Federico M. Sukno","raw_affiliation_strings":["Department of Information and Communication Technologies, Pompeu Fabra University, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Information and Communication Technologies, Pompeu Fabra University, Spain","institution_ids":["https://openalex.org/I170486558"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5079540599"],"corresponding_institution_ids":["https://openalex.org/I170486558"],"apc_list":null,"apc_paid":null,"fwci":0.7577,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.6959842,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":"62","issue":null,"first_page":"6294","last_page":"6298"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7558820247650146},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7393655776977539},{"id":"https://openalex.org/keywords/timit","display_name":"TIMIT","score":0.6326220035552979},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5096609592437744},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47076988220214844},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.467433363199234},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.41794806718826294},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4109511375427246},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.30732473731040955},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.25959038734436035},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0745840072631836}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7558820247650146},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7393655776977539},{"id":"https://openalex.org/C2778724510","wikidata":"https://www.wikidata.org/wiki/Q7670405","display_name":"TIMIT","level":3,"score":0.6326220035552979},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5096609592437744},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47076988220214844},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.467433363199234},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.41794806718826294},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4109511375427246},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.30732473731040955},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.25959038734436035},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0745840072631836},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053299","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053299","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1531976713","https://openalex.org/W1980389196","https://openalex.org/W1982656606","https://openalex.org/W2029199293","https://openalex.org/W2267805933","https://openalex.org/W2306241824","https://openalex.org/W2398406965","https://openalex.org/W2403559635","https://openalex.org/W2551572271","https://openalex.org/W2579335913","https://openalex.org/W2594690981","https://openalex.org/W2597418160","https://openalex.org/W2741151796","https://openalex.org/W2886945201","https://openalex.org/W2889624961","https://openalex.org/W2890952074","https://openalex.org/W2898336590","https://openalex.org/W2952746495","https://openalex.org/W2963024054","https://openalex.org/W2963356069","https://openalex.org/W2963370694","https://openalex.org/W2963744813","https://openalex.org/W2963784072","https://openalex.org/W2972799770","https://openalex.org/W6631751486","https://openalex.org/W6698261589","https://openalex.org/W6713298310","https://openalex.org/W6720691552","https://openalex.org/W6732514500","https://openalex.org/W6734491695","https://openalex.org/W6754079011","https://openalex.org/W6754392867"],"related_works":["https://openalex.org/W80423236","https://openalex.org/W3164669818","https://openalex.org/W3134920593","https://openalex.org/W2143247386","https://openalex.org/W1990589093","https://openalex.org/W2501000458","https://openalex.org/W1578749070","https://openalex.org/W2146842779","https://openalex.org/W2340308015","https://openalex.org/W2083296885"],"abstract_inverted_index":{"Audio-Visual":[0],"Speech":[1],"Recognition":[2],"(AVSR)":[3],"faces":[4],"the":[5,18,33,46,59,65,69,100,105,109,113,138],"difficult":[6],"task":[7],"of":[8,35,48,72,112,128],"exploiting":[9,58],"acoustic":[10],"and":[11,79,141],"visual":[12,19,36,60,70],"cues":[13],"simultaneously.":[14],"Augmenting":[15],"speech":[16],"with":[17,149],"channel":[20],"creates":[21],"its":[22,152],"own":[23],"challenges,":[24],"e.g.":[25],"every":[26],"person":[27],"has":[28],"unique":[29],"mouth":[30],"movements,":[31],"making":[32],"generalization":[34,47],"models":[37],"very":[38],"difficult.":[39],"This":[40],"factor":[41],"motivates":[42],"our":[43,133],"focus":[44],"on":[45,137],"speaker-independent":[49],"(SI)":[50],"AVSR":[51,85,135,153],"systems":[52],"especially":[53],"in":[54,88,95,104],"noisy":[55],"environments":[56],"by":[57],"domain.":[61,107],"Specifically,":[62],"we":[63,116],"are":[64],"first":[66],"to":[67,76,92,122,144,151],"explore":[68],"adaptation":[71,111],"an":[73,77,84],"SI-AVSR":[74],"system":[75,86,136,154],"unknown":[78,114],"unlabelled":[80],"speaker.":[81],"We":[82,131],"adapt":[83],"trained":[87],"a":[89,96,125,145],"source":[90],"domain":[91,98,110],"decode":[93],"samples":[94],"target":[97,106],"without":[99],"need":[101],"for":[102],"labels":[103],"For":[108],"speaker,":[115],"use":[117],"Coupled":[118],"Generative":[119],"Adversarial":[120],"Networks":[121],"automatically":[123],"learn":[124],"joint":[126],"distribution":[127],"multi-domain":[129],"images.":[130],"evaluate":[132],"character-based":[134],"TCD-TIMIT":[139],"dataset":[140],"obtain":[142],"up":[143],"10%":[146],"average":[147],"improvement":[148],"respect":[150],"equivalent.":[155]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
