{"id":"https://openalex.org/W7125836432","doi":"https://doi.org/10.48550/arxiv.2601.18177","title":"Lip-Siri: Contactless Open-Sentence Silent Speech with Wi-Fi Backscatter","display_name":"Lip-Siri: Contactless Open-Sentence Silent Speech with Wi-Fi Backscatter","publication_year":2026,"publication_date":"2026-01-26","ids":{"openalex":"https://openalex.org/W7125836432","doi":"https://doi.org/10.48550/arxiv.2601.18177"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.18177","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.18177","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.18177","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123963476","display_name":"Ye Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tian, Ye","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124051027","display_name":"Haohua Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Haohua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124049504","display_name":"Chao Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Chao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123996042","display_name":"Junyang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Junyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123957558","display_name":"Shanyue Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Shanyue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124018722","display_name":"Hao Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123969534","display_name":"Jiahui Hou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hou, Jiahui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5123938098","display_name":"Xiang-Yang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiang-Yang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5123963476"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9790999889373779,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9790999889373779,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.004100000020116568,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.0024999999441206455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.6843000054359436},{"id":"https://openalex.org/keywords/backscatter","display_name":"Backscatter (email)","score":0.5494999885559082},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5178999900817871},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5105000138282776},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.3986999988555908},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.32690000534057617},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.3212999999523163}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7253999710083008},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.6843000054359436},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6137999892234802},{"id":"https://openalex.org/C30354325","wikidata":"https://www.wikidata.org/wiki/Q204667","display_name":"Backscatter (email)","level":3,"score":0.5494999885559082},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5178999900817871},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5105000138282776},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40139999985694885},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.3986999988555908},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3659999966621399},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.3212999999523163},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.32019999623298645},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.3001999855041504},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.29670000076293945},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2842000126838684},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.2721000015735626},{"id":"https://openalex.org/C32022120","wikidata":"https://www.wikidata.org/wiki/Q797225","display_name":"Interference (communication)","level":3,"score":0.2685999870300293}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.18177","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.18177","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.18177","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.18177","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.8464886546134949}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Silent":[0],"speech":[1],"interfaces":[2],"(SSIs)":[3],"enable":[4],"silent":[5],"interaction":[6],"in":[7],"noise-sensitive":[8],"or":[9,39],"privacy-sensitive":[10],"settings.":[11],"However,":[12],"existing":[13],"SSIs":[14],"face":[15],"practical":[16],"deployment":[17],"trade-offs":[18],"among":[19],"privacy,":[20],"user":[21],"experience,":[22],"and":[23,26,81,114,134,143,156],"energy":[24],"consumption,":[25],"most":[27],"remain":[28],"limited":[29],"to":[30,51,77,124],"closed-set":[31],"recognition":[32,66],"over":[33],"small,":[34],"pre-defined":[35],"vocabularies":[36],"of":[37,54,90,159,168],"words":[38,145],"sentences,":[40],"which":[41],"restricts":[42],"real-world":[43],"expressiveness.":[44],"In":[45],"this":[46],"paper,":[47],"we":[48],"present":[49],"Lip-Siri,":[50],"the":[52,57,166],"best":[53],"our":[55],"knowledge,":[56],"first":[58],"Wi-Fi":[59,95],"backscatter--based":[60],"SSI":[61],"that":[62],"supports":[63],"open-vocabulary":[64],"sentence":[65,127,163],"via":[67,111],"lexicon-guided":[68,118],"subword":[69],"decoding.":[70],"Lip-Siri":[71,149],"designs":[72],"a":[73,117,157],"frequency-shifted":[74],"backscatter":[75],"tag":[76],"isolate":[78],"tag-modulated":[79],"reflections":[80],"suppress":[82],"interference":[83],"from":[84,93],"non-target":[85],"motions,":[86],"enabling":[87],"reliable":[88],"extraction":[89],"lip-motion":[91,103],"traces":[92,101],"ubiquitous":[94],"signals.":[96],"We":[97,129],"then":[98],"segment":[99],"continuous":[100,162],"into":[102],"units,":[104],"cluster":[105],"them,":[106],"learn":[107],"robust":[108],"unit":[109],"representations":[110],"cluster-based":[112],"self-supervision,":[113],"finally":[115],"propose":[116],"Transformer":[119],"encoder--decoder":[120],"with":[121,137],"beam":[122],"search":[123],"decode":[125],"variable-length":[126],"sequences.":[128],"implement":[130],"an":[131],"end-to-end":[132],"prototype":[133],"evaluate":[135],"it":[136],"15":[138],"participants":[139],"on":[140,153,161],"340":[141],"sentences":[142],"3,398":[144],"across":[146],"multiple":[147],"scenarios.":[148],"achieves":[150],"85.61%":[151],"accuracy":[152],"word":[154],"prediction":[155],"WER":[158],"36.87%":[160],"recognition,":[164],"approaching":[165],"performance":[167],"representative":[169],"vision-based":[170],"lip-reading":[171],"systems.":[172]},"counts_by_year":[],"updated_date":"2026-01-28T23:18:48.515280","created_date":"2026-01-28T00:00:00"}
