{"id":"https://openalex.org/W2140831887","doi":"https://doi.org/10.1109/tcsvt.2008.2009262","title":"Visual Lip Activity Detection and Speaker Detection Using Mouth Region Intensities","display_name":"Visual Lip Activity Detection and Speaker Detection Using Mouth Region Intensities","publication_year":2008,"publication_date":"2008-12-10","ids":{"openalex":"https://openalex.org/W2140831887","doi":"https://doi.org/10.1109/tcsvt.2008.2009262","mag":"2140831887"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2008.2009262","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2008.2009262","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031761881","display_name":"Spyridon Siatras","orcid":null},"institutions":[{"id":"https://openalex.org/I21370196","display_name":"Aristotle University of Thessaloniki","ror":"https://ror.org/02j61yw88","country_code":"GR","type":"education","lineage":["https://openalex.org/I21370196"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"S. Siatras","raw_affiliation_strings":["Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki, Greece","Dept. of Inf., Aristotle Univ. of Thessaloniki, Thessaloniki"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki, Greece","institution_ids":["https://openalex.org/I21370196"]},{"raw_affiliation_string":"Dept. of Inf., Aristotle Univ. of Thessaloniki, Thessaloniki","institution_ids":["https://openalex.org/I21370196"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034879808","display_name":"Nikos Nikolaidis","orcid":"https://orcid.org/0000-0003-1515-7986"},"institutions":[{"id":"https://openalex.org/I21370196","display_name":"Aristotle University of Thessaloniki","ror":"https://ror.org/02j61yw88","country_code":"GR","type":"education","lineage":["https://openalex.org/I21370196"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"N. Nikolaidis","raw_affiliation_strings":["Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki, Greece","Dept. of Inf., Aristotle Univ. of Thessaloniki, Thessaloniki"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki, Greece","institution_ids":["https://openalex.org/I21370196"]},{"raw_affiliation_string":"Dept. of Inf., Aristotle Univ. of Thessaloniki, Thessaloniki","institution_ids":["https://openalex.org/I21370196"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047365576","display_name":"Michail Krinidis","orcid":null},"institutions":[{"id":"https://openalex.org/I21370196","display_name":"Aristotle University of Thessaloniki","ror":"https://ror.org/02j61yw88","country_code":"GR","type":"education","lineage":["https://openalex.org/I21370196"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"M. Krinidis","raw_affiliation_strings":["Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki, Greece","Dept. of Inf., Aristotle Univ. of Thessaloniki, Thessaloniki"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki, Greece","institution_ids":["https://openalex.org/I21370196"]},{"raw_affiliation_string":"Dept. of Inf., Aristotle Univ. of Thessaloniki, Thessaloniki","institution_ids":["https://openalex.org/I21370196"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061130224","display_name":"Ioannis Pitas","orcid":"https://orcid.org/0009-0006-7555-8641"},"institutions":[{"id":"https://openalex.org/I21370196","display_name":"Aristotle University of Thessaloniki","ror":"https://ror.org/02j61yw88","country_code":"GR","type":"education","lineage":["https://openalex.org/I21370196"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"I. Pitas","raw_affiliation_strings":["Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki, Greece","Dept. of Inf., Aristotle Univ. of Thessaloniki, Thessaloniki"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki, Greece","institution_ids":["https://openalex.org/I21370196"]},{"raw_affiliation_string":"Dept. of Inf., Aristotle Univ. of Thessaloniki, Thessaloniki","institution_ids":["https://openalex.org/I21370196"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5031761881"],"corresponding_institution_ids":["https://openalex.org/I21370196"],"apc_list":null,"apc_paid":null,"fwci":1.1198,"has_fulltext":false,"cited_by_count":45,"citation_normalized_percentile":{"value":0.79509178,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"19","issue":"1","first_page":"133","last_page":"137"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7053176164627075},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6487982273101807},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.5697348117828369},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5456122159957886},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5165570974349976},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.4986257553100586},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4747862219810486},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4720657169818878},{"id":"https://openalex.org/keywords/standard-deviation","display_name":"Standard deviation","score":0.44690555334091187},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4336586892604828},{"id":"https://openalex.org/keywords/sensory-cue","display_name":"Sensory cue","score":0.4164665937423706},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.28960904479026794},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18176251649856567}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7053176164627075},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6487982273101807},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.5697348117828369},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5456122159957886},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5165570974349976},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.4986257553100586},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4747862219810486},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4720657169818878},{"id":"https://openalex.org/C22679943","wikidata":"https://www.wikidata.org/wiki/Q159375","display_name":"Standard deviation","level":2,"score":0.44690555334091187},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4336586892604828},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.4164665937423706},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.28960904479026794},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18176251649856567},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2008.2009262","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2008.2009262","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W84174137","https://openalex.org/W1506558619","https://openalex.org/W1570419640","https://openalex.org/W1667165204","https://openalex.org/W1965392255","https://openalex.org/W2015394094","https://openalex.org/W2020586124","https://openalex.org/W2033966334","https://openalex.org/W2036488048","https://openalex.org/W2046245205","https://openalex.org/W2064347532","https://openalex.org/W2108891849","https://openalex.org/W2134636830","https://openalex.org/W2135463994","https://openalex.org/W2142518644","https://openalex.org/W2155471382","https://openalex.org/W2155948168","https://openalex.org/W2164637175","https://openalex.org/W2169570466","https://openalex.org/W2169896329","https://openalex.org/W2942228371","https://openalex.org/W3097096317","https://openalex.org/W6603469096","https://openalex.org/W6634092760","https://openalex.org/W6682661836","https://openalex.org/W6684878041"],"related_works":["https://openalex.org/W2085033728","https://openalex.org/W4285411112","https://openalex.org/W2171299904","https://openalex.org/W118630527","https://openalex.org/W4252337044","https://openalex.org/W1647606319","https://openalex.org/W38289631","https://openalex.org/W4390494008","https://openalex.org/W2922442631","https://openalex.org/W2053596378"],"abstract_inverted_index":{"In":[0],"this":[1,23,89],"letter,":[2],"we":[3,104],"introduce":[4],"a":[5,32,65,84,119],"novel":[6],"approach":[7],"for":[8,75,91],"lip":[9,107],"activity":[10,108],"detection":[11,29,109],"and":[12,34,49,98],"speaker":[13],"detection,":[14],"using":[15],"solely":[16],"visual":[17,73,77,96],"information.":[18],"The":[19],"main":[20],"idea":[21],"in":[22,82,100,111,118],"work":[24],"is":[25],"to":[26,31,113],"apply":[27],"signal":[28],"algorithms":[30],"simple":[33],"easily":[35],"extracted":[36],"feature":[37],"from":[38],"the":[39,45,53,61,92,106,115],"mouth":[40,62],"region.":[41],"We":[42,79],"argue":[43],"that":[44,60,87],"increased":[46],"average":[47],"value":[48],"standard":[50],"deviation":[51],"of":[52,55,64,95],"number":[54],"pixels":[56],"with":[57],"low":[58],"intensities":[59],"region":[63],"speaking":[66],"person":[67],"demonstrates":[68],"can":[69],"be":[70],"used":[71],"as":[72],"cues":[74],"detecting":[76],"speech.":[78],"then":[80],"proceed":[81],"deriving":[83],"statistical":[85],"algorithm":[86],"utilizes":[88],"fact":[90],"efficient":[93],"characterization":[94],"speech":[97],"silence":[99],"video":[101],"sequences.":[102],"Furthermore,":[103],"employ":[105],"method":[110],"order":[112],"determine":[114],"active":[116],"speaker(s)":[117],"multi-person":[120],"environment.":[121]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
