{"id":"https://openalex.org/W2981767644","doi":"https://doi.org/10.1145/3343031.3351066","title":"Towards Automatic Face-to-Face Translation","display_name":"Towards Automatic Face-to-Face Translation","publication_year":2019,"publication_date":"2019-10-15","ids":{"openalex":"https://openalex.org/W2981767644","doi":"https://doi.org/10.1145/3343031.3351066","mag":"2981767644"},"language":"en","primary_location":{"id":"doi:10.1145/3343031.3351066","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3343031.3351066","pdf_url":null,"source":null,"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2003.00418","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Prajwal K R","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Prajwal K R","raw_affiliation_strings":["IIIT Hyderabad, Hyderabad, India"],"affiliations":[{"raw_affiliation_string":"IIIT Hyderabad, Hyderabad, India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Rudrabha Mukhopadhyay","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Rudrabha Mukhopadhyay","raw_affiliation_strings":["IIIT Hyderabad, Hyderabad, India"],"affiliations":[{"raw_affiliation_string":"IIIT Hyderabad, Hyderabad, India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jerin Philip","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Jerin Philip","raw_affiliation_strings":["IIIT Hyderabad, Hyderabad, India"],"affiliations":[{"raw_affiliation_string":"IIIT Hyderabad, Hyderabad, India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Abhishek Jha","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Abhishek Jha","raw_affiliation_strings":["IIIT Hyderabad, Hyderabad, India"],"affiliations":[{"raw_affiliation_string":"IIIT Hyderabad, Hyderabad, India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Vinay Namboodiri","orcid":null},"institutions":[{"id":"https://openalex.org/I94234084","display_name":"Indian Institute of Technology Kanpur","ror":"https://ror.org/05pjsgx75","country_code":"IN","type":"education","lineage":["https://openalex.org/I94234084"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Vinay Namboodiri","raw_affiliation_strings":["IIT Kanpur, Kanpur, India"],"affiliations":[{"raw_affiliation_string":"IIT Kanpur, Kanpur, India","institution_ids":["https://openalex.org/I94234084"]}]},{"author_position":"last","author":{"id":null,"display_name":"C V Jawahar","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"C V Jawahar","raw_affiliation_strings":["IIIT Hyderabad, Hyderabad, India"],"affiliations":[{"raw_affiliation_string":"IIIT Hyderabad, Hyderabad, India","institution_ids":["https://openalex.org/I65181880"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I65181880"],"apc_list":null,"apc_paid":null,"fwci":4.3573,"has_fulltext":false,"cited_by_count":125,"citation_normalized_percentile":{"value":0.95381612,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1428","last_page":"1436"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7702000141143799},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.5928000211715698},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5654000043869019},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.557200014591217},{"id":"https://openalex.org/keywords/subject","display_name":"Subject (documents)","score":0.38960000872612},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.3483999967575073}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8456000089645386},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7702000141143799},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5928000211715698},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5654000043869019},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.557200014591217},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5536999702453613},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5250999927520752},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.38960000872612},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.37400001287460327},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.3483999967575073},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3156000077724457},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25369998812675476},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3343031.3351066","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3343031.3351066","pdf_url":null,"source":null,"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2003.00418","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2003.00418","pdf_url":"https://arxiv.org/pdf/2003.00418","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2003.00418","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2003.00418","pdf_url":"https://arxiv.org/pdf/2003.00418","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2115252128","https://openalex.org/W2120847449","https://openalex.org/W2147885303","https://openalex.org/W2193413348","https://openalex.org/W2550821151","https://openalex.org/W2738406145","https://openalex.org/W2767052532","https://openalex.org/W2887920589","https://openalex.org/W2963081548","https://openalex.org/W2964243274"],"related_works":[],"abstract_inverted_index":{"In":[0,60],"light":[1],"of":[2,44,121],"the":[3,116,124,158],"recent":[4],"breakthroughs":[5],"in":[6,48,75],"automatic":[7,66],"machine":[8],"translation":[9,85],"systems,":[10],"we":[11,17,29,63,80],"propose":[12],"a":[13,34,42,45,52,82,105],"novel":[14,106],"approach":[15],"that":[16,31,38,130,153],"term":[18],"as":[19],"\"Face-to-Face":[20,101],"Translation\".":[21],"As":[22],"today's":[23],"digital":[24],"communication":[25],"becomes":[26],"increasingly":[27],"visual,":[28],"argue":[30],"there":[32],"is":[33],"need":[35],"for":[36,68,110,162],"systems":[37],"can":[39,155],"automatically":[40],"translate":[41],"video":[43,175],"person":[46],"speaking":[47],"language":[49,54],"A":[50],"into":[51],"target":[53],"B":[55],"with":[56,166],"realistic":[57,112],"lip":[58],"synchronization.":[59],"this":[61,69],"work,":[62],"create":[64],"an":[65],"pipeline":[67],"problem":[70],"and":[71,95,151,164,173],"demonstrate":[72],"its":[73],"impact":[74],"multiple":[76,90,148],"real-world":[77],"applications.":[78],"First,":[79],"build":[81],"working":[83],"speech-to-speech":[84],"system":[86],"by":[87,103],"bringing":[88],"together":[89],"existing":[91,134],"modules":[92],"from":[93,115],"speech":[94],"language.":[96],"We":[97,140],"then":[98],"move":[99],"towards":[100],"Translation\"":[102],"incorporating":[104],"visual":[107],"module,":[108],"LipGAN":[109,122],"generating":[111],"talking":[113],"faces":[114],"translated":[117],"audio.":[118],"Quantitative":[119],"evaluation":[120],"on":[123],"standard":[125,138],"LRW":[126],"test":[127],"set":[128],"shows":[129],"it":[131,154],"significantly":[132,156],"outperforms":[133],"approaches":[135],"across":[136,169],"all":[137],"metrics.":[139],"also":[141],"subject":[142],"our":[143],"Face-to-Face":[144],"Translation":[145],"pipeline,":[146],"to":[147],"human":[149],"evaluations":[150],"show":[152],"improve":[157],"overall":[159],"user":[160],"experience":[161],"consuming":[163],"interacting":[165],"multimodal":[167],"content":[168],"languages.":[170],"Code,":[171],"models":[172],"demo":[174],"are":[176],"made":[177],"publicly":[178],"available.":[179]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":22},{"year":2024,"cited_by_count":38},{"year":2023,"cited_by_count":36},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":4}],"updated_date":"2026-03-29T08:15:47.926485","created_date":"2019-11-01T00:00:00"}
