{"id":"https://openalex.org/W1985350076","doi":"https://doi.org/10.1109/slt.2012.6424264","title":"Robust detection of voiced segments in samples of everyday conversations using unsupervised HMMS","display_name":"Robust detection of voiced segments in samples of everyday conversations using unsupervised HMMS","publication_year":2012,"publication_date":"2012-12-01","ids":{"openalex":"https://openalex.org/W1985350076","doi":"https://doi.org/10.1109/slt.2012.6424264","mag":"1985350076","pmid":"https://pubmed.ncbi.nlm.nih.gov/33644784"},"language":"en","primary_location":{"id":"doi:10.1109/slt.2012.6424264","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2012.6424264","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/7909075","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038300369","display_name":"Meysam Asgari","orcid":"https://orcid.org/0000-0002-7651-8215"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Meysam Asgari","raw_affiliation_strings":["Center for Spoken Language Understanding, OHSU, Portland, OR, USA"],"affiliations":[{"raw_affiliation_string":"Center for Spoken Language Understanding, OHSU, Portland, OR, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025419994","display_name":"Izhak Shafran","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Izhak Shafran","raw_affiliation_strings":["Center for Spoken Language Understanding, OHSU, Portland, OR, USA"],"affiliations":[{"raw_affiliation_string":"Center for Spoken Language Understanding, OHSU, Portland, OR, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057153459","display_name":"Alireza Bayestehtashk","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alireza Bayestehtashk","raw_affiliation_strings":["Center for Spoken Language Understanding, OHSU, Portland, OR, USA"],"affiliations":[{"raw_affiliation_string":"Center for Spoken Language Understanding, OHSU, Portland, OR, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5038300369"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.4864,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.84844273,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"2012","issue":null,"first_page":"438","last_page":"442"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7227290868759155},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.710864782333374},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6962490081787109},{"id":"https://openalex.org/keywords/voice","display_name":"Voice","score":0.6670743823051453},{"id":"https://openalex.org/keywords/maximum-a-posteriori-estimation","display_name":"Maximum a posteriori estimation","score":0.5923476815223694},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5691732168197632},{"id":"https://openalex.org/keywords/harmonic","display_name":"Harmonic","score":0.5403215885162354},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47525832056999207},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.44035714864730835},{"id":"https://openalex.org/keywords/background-noise","display_name":"Background noise","score":0.4348953366279602},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.42029958963394165},{"id":"https://openalex.org/keywords/maximum-likelihood","display_name":"Maximum likelihood","score":0.3666386902332306},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21583911776542664},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.16153207421302795},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.09833434224128723}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7227290868759155},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.710864782333374},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6962490081787109},{"id":"https://openalex.org/C552089266","wikidata":"https://www.wikidata.org/wiki/Q494510","display_name":"Voice","level":2,"score":0.6670743823051453},{"id":"https://openalex.org/C9810830","wikidata":"https://www.wikidata.org/wiki/Q635384","display_name":"Maximum a posteriori estimation","level":3,"score":0.5923476815223694},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5691732168197632},{"id":"https://openalex.org/C127934551","wikidata":"https://www.wikidata.org/wiki/Q1148098","display_name":"Harmonic","level":2,"score":0.5403215885162354},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47525832056999207},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.44035714864730835},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.4348953366279602},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.42029958963394165},{"id":"https://openalex.org/C49781872","wikidata":"https://www.wikidata.org/wiki/Q1045555","display_name":"Maximum likelihood","level":2,"score":0.3666386902332306},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21583911776542664},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.16153207421302795},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.09833434224128723},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/slt.2012.6424264","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2012.6424264","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},{"id":"pmid:33644784","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/33644784","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SLT ... : ... IEEE Workshop on Spoken Language Technology : proceedings. IEEE Workshop on Spoken Language Technology","raw_type":null},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.386.1387","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.386.1387","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cslu.ogi.edu/~zak/is_slt12.pdf","raw_type":"text"},{"id":"pmh:oai:pubmedcentral.nih.gov:7909075","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/7909075","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"SLT Workshop Spok Lang Technol","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:pubmedcentral.nih.gov:7909075","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/7909075","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"SLT Workshop Spok Lang Technol","raw_type":"Text"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W23669922","https://openalex.org/W95551363","https://openalex.org/W2040298897","https://openalex.org/W2119535101","https://openalex.org/W2122725421","https://openalex.org/W2134025364","https://openalex.org/W2142040441","https://openalex.org/W2142773518","https://openalex.org/W2151484683","https://openalex.org/W2151572393","https://openalex.org/W2792905612","https://openalex.org/W3152128808","https://openalex.org/W6681167335","https://openalex.org/W6682194319"],"related_works":["https://openalex.org/W1595191759","https://openalex.org/W1503532423","https://openalex.org/W2010299594","https://openalex.org/W1967494390","https://openalex.org/W3013496002","https://openalex.org/W2465363361","https://openalex.org/W2896820906","https://openalex.org/W2101542441","https://openalex.org/W4284711868","https://openalex.org/W2283950057"],"abstract_inverted_index":{"We":[0,131,175,188],"investigate":[1],"methods":[2],"for":[3,31,46,99],"detecting":[4,47,197],"voiced":[5,48,63,76],"segments":[6,77],"in":[7,72,220],"everyday":[8,204],"conversations":[9],"from":[10,138,170],"ambient":[11],"recordings.":[12],"Such":[13],"recordings":[14],"contain":[15],"high":[16],"diversity":[17],"of":[18,57,93,110,116,135,146,159,182,186,196,203],"background":[19],"noise,":[20],"making":[21],"it":[22],"difficult":[23],"or":[24],"infeasible":[25],"to":[26,54,167],"collect":[27],"representative":[28],"labelled":[29],"samples":[30],"estimating":[32],"noise-specific":[33],"HMM":[34,148],"models.":[35],"The":[36,156],"popular":[37,221],"utility":[38],"<i>get-f0</i>":[39],"and":[40,149,206],"its":[41],"derivatives":[42],"compute":[43],"normalized":[44],"cross-correlation":[45],"segments,":[49],"which":[50,82],"unfortunately":[51],"is":[52,65,163],"sensitive":[53],"different":[55],"types":[56],"noise.":[58,174],"Exploiting":[59],"the":[60,91,94,108,120,133,139,160,183],"fact":[61],"that":[62,164,208],"speech":[64,151,169,198,205],"not":[66],"just":[67],"periodic":[68],"but":[69],"also":[70],"rich":[71],"harmonic,":[73],"we":[74,118],"model":[75,95,121,162],"by":[78,179],"adopting":[79],"harmonic":[80,111,140,161,173],"models,":[81],"have":[83],"recently":[84],"gained":[85],"considerable":[86],"attention.":[87],"In":[88],"previous":[89],"work,":[90],"parameters":[92,122],"were":[96],"estimated":[97],"independently":[98],"each":[100],"frame":[101],"using":[102,125,152],"maximum":[103,127],"likelihood":[104,134],"criterion.":[105,130],"However,":[106],"since":[107],"distribution":[109],"coefficients":[112],"depend":[113],"on":[114,193,199],"articulators":[115],"speakers,":[117],"estimate":[119],"more":[123],"robustly":[124],"a":[126,194,200],"<i>a":[128],"posteriori</i>":[129],"use":[132],"voicing,":[136],"computed":[137],"model,":[141],"as":[142],"an":[143,147],"observation":[144],"probability":[145],"detect":[150],"this":[153,177],"unsupervised":[154],"HMM.":[155],"one":[157],"caveat":[158],"they":[165],"fail":[166],"distinguish":[168],"other":[171],"stationary":[172],"rectify":[176],"weakness":[178],"taking":[180],"advantage":[181],"non-stationary":[184],"property":[185],"speech.":[187],"evaluate":[189],"our":[190],"models":[191,210],"empirically":[192],"task":[195],"large":[201],"corpora":[202],"demonstrate":[207],"these":[209],"perform":[211],"significantly":[212],"better":[213],"than":[214],"standard":[215],"voice":[216],"detection":[217],"algorithm":[218],"employed":[219],"tools.":[222]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
