{"id":"https://openalex.org/W7154949835","doi":"https://doi.org/10.48550/arxiv.2604.15718","title":"NeuroLip: An Event-driven Spatiotemporal Learning Framework for Cross-Scene Lip-Motion-based Visual Speaker Recognition","display_name":"NeuroLip: An Event-driven Spatiotemporal Learning Framework for Cross-Scene Lip-Motion-based Visual Speaker Recognition","publication_year":2026,"publication_date":"2026-04-17","ids":{"openalex":"https://openalex.org/W7154949835","doi":"https://doi.org/10.48550/arxiv.2604.15718"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.15718","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15718","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.15718","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134064664","display_name":"Junguang Yao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Junguang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000387478","display_name":"Wenye Liu","orcid":"https://orcid.org/0000-0003-4590-5367"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Wenye","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024072796","display_name":"Stjepan Picek","orcid":"https://orcid.org/0000-0001-7509-4337"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Picek, Stjepan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101821500","display_name":"Yue Zheng","orcid":"https://orcid.org/0000-0002-8729-9863"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Yue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.007300000172108412,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.0008999999845400453,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6189000010490417},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.4810999929904938},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.43810001015663147},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.41620001196861267},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.3822000026702881},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.37720000743865967},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3765000104904175},{"id":"https://openalex.org/keywords/sensory-cue","display_name":"Sensory cue","score":0.37400001287460327}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7861999869346619},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6565999984741211},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6189000010490417},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.4810999929904938},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4758000075817108},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.43810001015663147},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.41620001196861267},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3822000026702881},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.37720000743865967},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3765000104904175},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.37400001287460327},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.36899998784065247},{"id":"https://openalex.org/C184297639","wikidata":"https://www.wikidata.org/wiki/Q177765","display_name":"Biometrics","level":2,"score":0.3675999939441681},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3499999940395355},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3433000147342682},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3172000050544739},{"id":"https://openalex.org/C2776035091","wikidata":"https://www.wikidata.org/wiki/Q7928819","display_name":"Viewpoints","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.298799991607666},{"id":"https://openalex.org/C2777708103","wikidata":"https://www.wikidata.org/wiki/Q852589","display_name":"Motion blur","level":3,"score":0.29440000653266907},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.28839999437332153},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.2572999894618988},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.25270000100135803},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.15718","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15718","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.15718","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15718","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.7079131603240967}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Visual":[0],"speaker":[1],"recognition":[2,114],"based":[3],"on":[4,31,207],"lip":[5,34,80,96],"motion":[6,35,68,81,151],"offers":[7],"a":[8,99,109,125,174],"silent,":[9],"hands-free,":[10],"and":[11,45,70,82,120,153,187,199,210,226],"behavior-driven":[12],"biometric":[13],"solution":[14],"that":[15,28,93,139,193],"remains":[16],"effective":[17],"even":[18],"when":[19],"acoustic":[20],"cues":[21,162],"are":[22,228],"unavailable.":[23],"Compared":[24],"to":[25,67,117,159],"traditional":[26],"methods":[27,219],"rely":[29],"heavily":[30],"appearance-dependent":[32],"representations,":[33],"encodes":[36],"subject-specific":[37],"behavioral":[38,142],"dynamics":[39,59,97],"driven":[40],"by":[41,144,220],"consistent":[42],"articulation":[43],"patterns":[44,143],"muscle":[46],"coordination,":[47],"offering":[48],"inherent":[49],"stability":[50,78],"across":[51],"environmental":[52],"changes.":[53],"However,":[54],"capturing":[55],"these":[56,84],"robust,":[57],"fine-grained":[58,95],"is":[60,106],"challenging":[61],"for":[62],"conventional":[63],"frame-based":[64],"cameras":[65],"due":[66],"blur":[69],"low":[71],"dynamic":[72],"range.":[73],"To":[74,167],"exploit":[75],"the":[76],"intrinsic":[77],"of":[79],"address":[83],"sensing":[85],"limitations,":[86],"we":[87,171],"propose":[88],"NeuroLip,":[89],"an":[90],"event-based":[91,176],"framework":[92],"captures":[94],"under":[98,108,183,213],"strict":[100],"yet":[101],"practical":[102],"cross-scene":[103,201],"protocol:":[104],"training":[105],"performed":[107],"single":[110],"controlled":[111],"condition,":[112],"while":[113,147],"must":[115],"generalize":[116],"unseen":[118,208],"viewing":[119],"lighting":[121],"conditions.":[122],"NeuroLip":[123,194],"features":[124],"1)":[126],"Temporal-aware":[127],"Voxel":[128],"Encoding":[129],"module":[130],"with":[131],"adaptive":[132],"event":[133,165],"weighting,":[134],"2)":[135],"Structure-aware":[136],"Spatial":[137],"Enhancer":[138],"amplifies":[140],"discriminative":[141],"suppressing":[145],"noise":[146],"preserving":[148],"vertically":[149],"structured":[150],"information,":[152],"3)":[154],"Polarity":[155],"Consistency":[156],"Regularization":[157],"mechanism":[158],"retain":[160],"motion-direction":[161],"encoded":[163],"in":[164],"polarities.":[166],"facilitate":[168],"systematic":[169],"evaluation,":[170],"introduce":[172],"DVSpeaker,":[173],"comprehensive":[175],"lip-motion":[177],"dataset":[178,225],"comprising":[179],"50":[180],"subjects":[181],"recorded":[182],"four":[184],"distinct":[185],"viewpoint":[186],"illumination":[188],"scenarios.":[189],"Extensive":[190],"experiments":[191],"demonstrate":[192],"achieves":[195],"near-perfect":[196],"matched-scene":[197],"accuracy":[198,206],"robust":[200],"generalization,":[202],"attaining":[203],"over":[204],"71%":[205],"viewpoints":[209],"nearly":[211],"76%":[212],"low-light":[214],"conditions,":[215],"outperforming":[216],"representative":[217],"existing":[218],"at":[221,231],"least":[222],"8.54%.":[223],"The":[224],"code":[227],"publicly":[229],"available":[230],"https://github.com/JiuZeongit/NeuroLip.":[232]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-21T00:00:00"}
