{"id":"https://openalex.org/W2034356580","doi":"https://doi.org/10.1109/jstsp.2012.2237379","title":"Audiovisual Voice Activity Detection Based on Microphone Arrays and Color Information","display_name":"Audiovisual Voice Activity Detection Based on Microphone Arrays and Color Information","publication_year":2013,"publication_date":"2013-01-01","ids":{"openalex":"https://openalex.org/W2034356580","doi":"https://doi.org/10.1109/jstsp.2012.2237379","mag":"2034356580"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2012.2237379","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2012.2237379","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032058419","display_name":"Vicente Peruffo Minotto","orcid":null},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Vicente P. Minotto","raw_affiliation_strings":["Instituto de Inform\u00e1tica, Universidade Federal do Rio Grande do Sul, Rio Grande do Sul, Brazil","Inst. de Inf., Univ. Fed. do Rio Grande do Sul, Porto Alegre, Brazil"],"affiliations":[{"raw_affiliation_string":"Instituto de Inform\u00e1tica, Universidade Federal do Rio Grande do Sul, Rio Grande do Sul, Brazil","institution_ids":["https://openalex.org/I130442723"]},{"raw_affiliation_string":"Inst. de Inf., Univ. Fed. do Rio Grande do Sul, Porto Alegre, Brazil","institution_ids":["https://openalex.org/I130442723"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041350496","display_name":"Carlos B. O. Lopes","orcid":"https://orcid.org/0009-0004-4559-0099"},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Carlos B. O. Lopes","raw_affiliation_strings":["Instituto de Inform\u00e1tica, Universidade Federal do Rio Grande do Sul, Rio Grande do Sul, Brazil","Inst. de Inf., Univ. Fed. do Rio Grande do Sul, Porto Alegre, Brazil"],"affiliations":[{"raw_affiliation_string":"Instituto de Inform\u00e1tica, Universidade Federal do Rio Grande do Sul, Rio Grande do Sul, Brazil","institution_ids":["https://openalex.org/I130442723"]},{"raw_affiliation_string":"Inst. de Inf., Univ. Fed. do Rio Grande do Sul, Porto Alegre, Brazil","institution_ids":["https://openalex.org/I130442723"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059279422","display_name":"Jacob Scharcanski","orcid":"https://orcid.org/0000-0002-9223-4693"},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Jacob Scharcanski","raw_affiliation_strings":["Instituto de Inform\u00e1tica, Universidade Federal do Rio Grande do Sul, Rio Grande do Sul, Brazil","Inst. de Inf., Univ. Fed. do Rio Grande do Sul, Porto Alegre, Brazil"],"affiliations":[{"raw_affiliation_string":"Instituto de Inform\u00e1tica, Universidade Federal do Rio Grande do Sul, Rio Grande do Sul, Brazil","institution_ids":["https://openalex.org/I130442723"]},{"raw_affiliation_string":"Inst. de Inf., Univ. Fed. do Rio Grande do Sul, Porto Alegre, Brazil","institution_ids":["https://openalex.org/I130442723"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012781859","display_name":"Cl\u00e1udio R. Jung","orcid":"https://orcid.org/0000-0002-4711-5783"},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Claudio R. Jung","raw_affiliation_strings":["Instituto de Inform\u00e1tica, Universidade Federal do Rio Grande do Sul, Rio Grande do Sul, Brazil","Inst. de Inf., Univ. Fed. do Rio Grande do Sul, Porto Alegre, Brazil"],"affiliations":[{"raw_affiliation_string":"Instituto de Inform\u00e1tica, Universidade Federal do Rio Grande do Sul, Rio Grande do Sul, Brazil","institution_ids":["https://openalex.org/I130442723"]},{"raw_affiliation_string":"Inst. de Inf., Univ. Fed. do Rio Grande do Sul, Porto Alegre, Brazil","institution_ids":["https://openalex.org/I130442723"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089545632","display_name":"Bowon Lee","orcid":"https://orcid.org/0000-0001-5417-5699"},"institutions":[{"id":"https://openalex.org/I1324840837","display_name":"Hewlett-Packard (United States)","ror":"https://ror.org/059rn9488","country_code":"US","type":"company","lineage":["https://openalex.org/I1324840837"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bowon Lee","raw_affiliation_strings":["Hewlett Packard Laboratories, Palo Alto, CA, USA","Hewlett\u2013Packard Laboratories, Palo Alto, CA, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Hewlett Packard Laboratories, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1324840837"]},{"raw_affiliation_string":"Hewlett\u2013Packard Laboratories, Palo Alto, CA, USA#TAB#","institution_ids":["https://openalex.org/I1324840837"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5032058419"],"corresponding_institution_ids":["https://openalex.org/I130442723"],"apc_list":null,"apc_paid":null,"fwci":4.4689,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.94863535,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"7","issue":"1","first_page":"147","last_page":"156"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8395607471466064},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6416712999343872},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.6410199999809265},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6403084397315979},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.6035448908805847},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5420322418212891},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.5016880035400391},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.49412110447883606},{"id":"https://openalex.org/keywords/teleconference","display_name":"Teleconference","score":0.4600081741809845},{"id":"https://openalex.org/keywords/audio-analyzer","display_name":"Audio analyzer","score":0.42893174290657043},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.42840084433555603},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4268595576286316},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4253511428833008},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37201759219169617},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.32285135984420776},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.28841856122016907},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.27132901549339294},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.1514519453048706}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8395607471466064},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6416712999343872},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.6410199999809265},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6403084397315979},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.6035448908805847},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5420322418212891},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.5016880035400391},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.49412110447883606},{"id":"https://openalex.org/C126669455","wikidata":"https://www.wikidata.org/wiki/Q1630941","display_name":"Teleconference","level":2,"score":0.4600081741809845},{"id":"https://openalex.org/C160372630","wikidata":"https://www.wikidata.org/wiki/Q4819855","display_name":"Audio analyzer","level":5,"score":0.42893174290657043},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.42840084433555603},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4268595576286316},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4253511428833008},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37201759219169617},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.32285135984420776},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.28841856122016907},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.27132901549339294},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.1514519453048706},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jstsp.2012.2237379","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2012.2237379","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W705233","https://openalex.org/W117026553","https://openalex.org/W1514579074","https://openalex.org/W1538393083","https://openalex.org/W1630964756","https://openalex.org/W1881647329","https://openalex.org/W1964721292","https://openalex.org/W1973067693","https://openalex.org/W1979711143","https://openalex.org/W1981673559","https://openalex.org/W2001111768","https://openalex.org/W2002979515","https://openalex.org/W2015245929","https://openalex.org/W2053101950","https://openalex.org/W2059759076","https://openalex.org/W2065257586","https://openalex.org/W2089103658","https://openalex.org/W2095516890","https://openalex.org/W2100555417","https://openalex.org/W2114661268","https://openalex.org/W2116529913","https://openalex.org/W2122410182","https://openalex.org/W2125055259","https://openalex.org/W2125838338","https://openalex.org/W2125993116","https://openalex.org/W2129120544","https://openalex.org/W2132166479","https://openalex.org/W2133059825","https://openalex.org/W2133990480","https://openalex.org/W2138414774","https://openalex.org/W2139531267","https://openalex.org/W2142623206","https://openalex.org/W2145310314","https://openalex.org/W2152558472","https://openalex.org/W2157363431","https://openalex.org/W2163839957","https://openalex.org/W2164637175","https://openalex.org/W2165630397","https://openalex.org/W2167206042","https://openalex.org/W2400967734","https://openalex.org/W2623293810","https://openalex.org/W2912391306","https://openalex.org/W3146030285","https://openalex.org/W4298266977","https://openalex.org/W4312960235","https://openalex.org/W6630633036","https://openalex.org/W6636614058","https://openalex.org/W6639409548","https://openalex.org/W6677131165","https://openalex.org/W6680550258","https://openalex.org/W6681393664","https://openalex.org/W6683989643","https://openalex.org/W6712911086"],"related_works":["https://openalex.org/W2031844006","https://openalex.org/W2122030153","https://openalex.org/W2098934641","https://openalex.org/W2162449135","https://openalex.org/W1996547972","https://openalex.org/W2227972867","https://openalex.org/W2139941521","https://openalex.org/W2363866404","https://openalex.org/W9205999","https://openalex.org/W2392717295"],"abstract_inverted_index":{"Audiovisual":[0],"voice":[1,40],"activity":[2,41],"detection":[3,42],"is":[4,53,109,131,142,167],"a":[5,26,67,100,126,163],"necessary":[6],"stage":[7],"in":[8,48,62],"several":[9],"problems,":[10],"such":[11],"as":[12,45],"advanced":[13],"teleconferencing,":[14],"speech":[15,124],"recognition,":[16],"and":[17,22,61,72,89,94,138,177],"human-computer":[18],"interaction.":[19],"Lip":[20,51,107],"motion":[21,52,108],"audio":[23],"analysis":[24],"provide":[25],"large":[27],"amount":[28],"of":[29,122,136,153,159,173],"information":[30,130],"that":[31,117,183],"can":[32],"be":[33],"integrated":[34],"to":[35,81,144,169,193],"produce":[36],"more":[37],"robust":[38],"audiovisual":[39,186],"(VAD)":[43],"schemes,":[44],"we":[46,65],"discuss":[47],"this":[49,63],"paper.":[50],"very":[54],"useful":[55],"for":[56,70,86],"detecting":[57],"the":[58,76,83,90,104,119,139,157,160,171,184],"active":[59,123],"speaker,":[60],"paper":[64],"introduce":[66],"new":[68],"approach":[69,102,166,187],"lips":[71],"visual":[73],"VAD.":[74],"First,":[75],"algorithm":[77],"performs":[78],"skin":[79],"segmentation":[80],"reduce":[82],"search":[84],"area":[85],"lip":[87,93],"extraction,":[88],"most":[91],"likely":[92,120],"non-lip":[95],"regions":[96],"are":[97],"detected":[98,111],"using":[99,112],"Bayesian":[101],"within":[103,125],"delimited":[105],"area.":[106],"then":[110],"Hidden":[113],"Markov":[114],"Models":[115],"(HMMs)":[116],"estimate":[118],"occurrence":[121],"temporal":[127],"window.":[128],"Audio":[129],"captured":[132],"by":[133],"an":[134],"array":[135],"microphones,":[137],"sound-based":[140],"VAD":[141,195],"related":[143],"finding":[145],"spatio-temporally":[146],"coherent":[147],"sound":[148],"sources":[149],"through":[150],"another":[151],"set":[152],"HMMs.":[154],"To":[155],"increase":[156],"robustness":[158],"proposed":[161,185],"system,":[162],"late":[164],"fusion":[165],"employed":[168],"combine":[170],"result":[172],"each":[174],"modality":[175],"(audio":[176],"video).":[178],"Our":[179],"experimental":[180],"results":[181,190],"indicate":[182],"presents":[188],"better":[189],"when":[191],"compared":[192],"existing":[194],"algorithms.":[196]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
