{"id":"https://openalex.org/W3206578098","doi":"https://doi.org/10.1145/3474085.3475220","title":"SimulLR: Simultaneous Lip Reading Transducer with Attention-Guided Adaptive Memory","display_name":"SimulLR: Simultaneous Lip Reading Transducer with Attention-Guided Adaptive Memory","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3206578098","doi":"https://doi.org/10.1145/3474085.3475220","mag":"3206578098"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475220","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032950508","display_name":"Zhijie Lin","orcid":"https://orcid.org/0000-0003-3671-4032"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhijie Lin","raw_affiliation_strings":["Zhejiang University, HangZhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, HangZhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079260216","display_name":"Zhou Zhao","orcid":"https://orcid.org/0000-0001-6121-0384"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhou Zhao","raw_affiliation_strings":["Zhejiang University, HangZhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, HangZhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100673440","display_name":"Haoyuan Li","orcid":"https://orcid.org/0009-0007-4678-9923"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyuan Li","raw_affiliation_strings":["Zhejiang University, HangZhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, HangZhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065126806","display_name":"Jinglin Liu","orcid":"https://orcid.org/0000-0002-9905-3887"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinglin Liu","raw_affiliation_strings":["Zhejiang University, HangZhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, HangZhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100437691","display_name":"Meng Zhang","orcid":"https://orcid.org/0000-0001-7064-8740"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Zhang","raw_affiliation_strings":["Huawei Noah's Ark Lab, ShenZhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, ShenZhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044933782","display_name":"Xingshan Zeng","orcid":"https://orcid.org/0000-0002-0455-5519"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingshan Zeng","raw_affiliation_strings":["Huawei Noah's Ark Lab, ShenZhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, ShenZhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102013345","display_name":"Xiaofei He","orcid":"https://orcid.org/0009-0001-9107-2354"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofei He","raw_affiliation_strings":["Zhejiang University, HangZhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, HangZhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.6986,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.85010377,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1359","last_page":"1367"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8556256890296936},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5877187252044678},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5303419828414917},{"id":"https://openalex.org/keywords/reading","display_name":"Reading (process)","score":0.4818759858608246},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4700911045074463},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4500376582145691},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.43906939029693604},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42407095432281494},{"id":"https://openalex.org/keywords/transducer","display_name":"Transducer","score":0.41027650237083435},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.34147483110427856},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1793341040611267}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8556256890296936},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5877187252044678},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5303419828414917},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.4818759858608246},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4700911045074463},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4500376582145691},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.43906939029693604},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42407095432281494},{"id":"https://openalex.org/C56318395","wikidata":"https://www.wikidata.org/wiki/Q215928","display_name":"Transducer","level":2,"score":0.41027650237083435},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.34147483110427856},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1793341040611267},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3474085.3475220","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8799999952316284,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G8165535729","display_name":null,"funder_award_id":"61836002, 62072397","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8990919479","display_name":null,"funder_award_id":"2018AAA0100603","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W2015143272","https://openalex.org/W2029199293","https://openalex.org/W2115252128","https://openalex.org/W2121457870","https://openalex.org/W2178654303","https://openalex.org/W2193413348","https://openalex.org/W2251955814","https://openalex.org/W2267805933","https://openalex.org/W2551572271","https://openalex.org/W2594690981","https://openalex.org/W2892611740","https://openalex.org/W2930957955","https://openalex.org/W2936123380","https://openalex.org/W2949354227","https://openalex.org/W2951015274","https://openalex.org/W2952746495","https://openalex.org/W2963030892","https://openalex.org/W2963403868","https://openalex.org/W2963414781","https://openalex.org/W2963729263","https://openalex.org/W2963785710","https://openalex.org/W2973122799","https://openalex.org/W2981501041","https://openalex.org/W2996970093","https://openalex.org/W3015315932","https://openalex.org/W3015927303","https://openalex.org/W3016011581","https://openalex.org/W3022631507","https://openalex.org/W3034586846","https://openalex.org/W3037856073","https://openalex.org/W3047497120","https://openalex.org/W3086926995","https://openalex.org/W3093337631","https://openalex.org/W3103801904"],"related_works":["https://openalex.org/W4384820447","https://openalex.org/W2012283803","https://openalex.org/W2072454424","https://openalex.org/W2117438306","https://openalex.org/W2185942010","https://openalex.org/W2260725127","https://openalex.org/W2004297762","https://openalex.org/W1992056405","https://openalex.org/W767846903","https://openalex.org/W2619803670"],"abstract_inverted_index":{"Lip":[0],"reading,":[1],"aiming":[2],"to":[3,8,26,58,128,156,178,197],"recognize":[4],"spoken":[5],"sentences":[6,105],"according":[7],"the":[9,18,52,59,68,92,99,103,130,133,158,179,206,217,220,226,237],"given":[10],"video":[11,163,187],"of":[12,70,94,102,132,168,201,239],"lip":[13,37,72,79,134],"movements":[14],"without":[15],"relying":[16],"on":[17],"audio":[19],"stream,":[20],"has":[21],"attracted":[22],"great":[23],"interest":[24],"due":[25,177],"its":[27],"application":[28],"in":[29,47,181],"many":[30],"scenarios.":[31],"Although":[32],"prior":[33],"works":[34],"that":[35,216],"explore":[36],"reading":[38,73,135],"have":[39],"obtained":[40],"salient":[41],"achievements,":[42],"they":[43],"are":[44,54],"all":[45],"trained":[46],"a":[48,77,111,148,162,192],"non-simultaneous":[49,228],"manner":[50],"where":[51],"predictions":[53],"generated":[55,104],"requiring":[56],"access":[57],"full":[60],"video.":[61],"To":[62,90,138],"breakthrough":[63],"this":[64],"constraint,":[65],"we":[66,109,146,190],"study":[67],"task":[69],"simultaneous":[71,78,107,144],"and":[74,114,125,152,204,230],"devise":[75,191],"SimulLR,":[76],"Reading":[80],"transducer":[81],"with":[82,209,225],"attention-guided":[83,194],"adaptive":[84,195],"memory":[85,196],"from":[86],"three":[87],"aspects:":[88],"(1)":[89],"address":[91],"challenge":[93],"monotonic":[95],"alignments":[96],"while":[97],"considering":[98],"syntactic":[100],"structure":[101],"under":[106],"setting,":[108],"build":[110],"transducer-based":[112],"model":[113,123],"design":[115],"several":[116],"effective":[117],"training":[118,131],"strategies":[119],"including":[120],"CTC":[121],"pre-training,":[122],"warm-up":[124],"curriculum":[126],"learning":[127],"promote":[129],"transducer.":[136],"(2)":[137],"learn":[139],"better":[140],"spatio-temporal":[141],"representations":[142,208],"for":[143,185],"encoder,":[145],"construct":[147],"truncated":[149],"3D":[150],"convolution":[151],"time-restricted":[153],"self-attention":[154],"layer":[155],"perform":[157],"frame-to-frame":[159],"interaction":[160],"within":[161],"segment":[164],"containing":[165],"fixed":[166],"number":[167],"frames.":[169],"(3)":[170],"The":[171,213],"history":[172,202],"information":[173,200],"is":[174],"always":[175],"limited":[176],"storage":[180],"real-time":[182],"scenarios,":[183],"especially":[184],"massive":[186],"data.":[188],"Therefore,":[189],"novel":[193],"organize":[198],"semantic":[199],"segments":[203],"enhance":[205],"visual":[207],"acceptable":[210],"computation-aware":[211],"latency.":[212],"experiments":[214],"show":[215],"SimulLR":[218],"achieves":[219],"translation":[221],"speedup":[222],"9.10x":[223],"compared":[224],"state-of-the-art":[227],"methods,":[229],"also":[231],"obtains":[232],"competitive":[233],"results,":[234],"which":[235],"indicates":[236],"effectiveness":[238],"our":[240],"proposed":[241],"methods.":[242]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
