{"id":"https://openalex.org/W45820414","doi":"https://doi.org/10.21437/interspeech.2013-59","title":"Detection of nonverbal vocalizations using Gaussian mixture models: looking for fillers and laughter in conversational speech","display_name":"Detection of nonverbal vocalizations using Gaussian mixture models: looking for fillers and laughter in conversational speech","publication_year":2013,"publication_date":"2013-08-25","ids":{"openalex":"https://openalex.org/W45820414","doi":"https://doi.org/10.21437/interspeech.2013-59","mag":"45820414"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2013-59","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2013-59","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2013","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://ris.utwente.nl/ws/files/5340172/2013_krikke_truong.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053068033","display_name":"Teun Krikke","orcid":null},"institutions":[{"id":"https://openalex.org/I94624287","display_name":"University of Twente","ror":"https://ror.org/006hf6230","country_code":"NL","type":"education","lineage":["https://openalex.org/I94624287"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Teun F. Krikke","raw_affiliation_strings":["Human Media Interaction, University of Twente Enschede, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Human Media Interaction, University of Twente Enschede, The Netherlands","institution_ids":["https://openalex.org/I94624287"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068294985","display_name":"Khiet P. Truong","orcid":"https://orcid.org/0000-0002-7243-0523"},"institutions":[{"id":"https://openalex.org/I94624287","display_name":"University of Twente","ror":"https://ror.org/006hf6230","country_code":"NL","type":"education","lineage":["https://openalex.org/I94624287"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Khiet P. Truong","raw_affiliation_strings":["Univ. of Twente#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Univ. of Twente#TAB#","institution_ids":["https://openalex.org/I94624287"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.2545,"has_fulltext":true,"cited_by_count":15,"citation_normalized_percentile":{"value":0.87528868,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"163","last_page":"167"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/laughter","display_name":"Laughter","score":0.874360203742981},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7023247480392456},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6987762451171875},{"id":"https://openalex.org/keywords/nonverbal-communication","display_name":"Nonverbal communication","score":0.5540103316307068},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.5425681471824646},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.53917396068573},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.5299105644226074},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.5023882389068604},{"id":"https://openalex.org/keywords/vocal-tract","display_name":"Vocal tract","score":0.48780038952827454},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.45324790477752686},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.42979976534843445},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38212883472442627},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38129493594169617},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.2668113112449646},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.12866970896720886},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.09880796074867249}],"concepts":[{"id":"https://openalex.org/C2780775679","wikidata":"https://www.wikidata.org/wiki/Q170579","display_name":"Laughter","level":2,"score":0.874360203742981},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7023247480392456},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6987762451171875},{"id":"https://openalex.org/C145633318","wikidata":"https://www.wikidata.org/wiki/Q207125","display_name":"Nonverbal communication","level":2,"score":0.5540103316307068},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.5425681471824646},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.53917396068573},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.5299105644226074},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.5023882389068604},{"id":"https://openalex.org/C47401133","wikidata":"https://www.wikidata.org/wiki/Q748953","display_name":"Vocal tract","level":2,"score":0.48780038952827454},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.45324790477752686},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.42979976534843445},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38212883472442627},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38129493594169617},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2668113112449646},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.12866970896720886},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.09880796074867249},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.21437/interspeech.2013-59","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2013-59","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2013","raw_type":"proceedings-article"},{"id":"pmh:oai:ris.utwente.nl:publications/9cd90f29-52cc-4a5f-b87f-eb7f467da4cd","is_oa":true,"landing_page_url":"https://research.utwente.nl/en/publications/9cd90f29-52cc-4a5f-b87f-eb7f467da4cd","pdf_url":"https://ris.utwente.nl/ws/files/5340172/2013_krikke_truong.pdf","source":{"id":"https://openalex.org/S4406922991","display_name":"University of Twente Research Information","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Krikke, T F & Truong, K P 2013, Detection of nonverbal vocalizations using Gaussian Mixture Models: looking for fillers and laughter in conversational speech. in Proceedings of the 14th Annual Conference of the International Speech Communication Association, Interspeech 2013. International Speech Communication Association, Baixas, Framce, pp. 163-167, 14th Annual Conference of the International Speech Communication Association, INTERSPEECH 2013, Lyon, France, 25/08/13. < http://www.isca-speech.org/archive/interspeech_2013/i13_0163.html >","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:ris.utwente.nl:publications/9cd90f29-52cc-4a5f-b87f-eb7f467da4cd","is_oa":false,"landing_page_url":"http://www.isca-speech.org/archive/interspeech_2013/i13_0163.html","pdf_url":null,"source":{"id":"https://openalex.org/S4406922991","display_name":"University of Twente Research Information","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"pmh:oai:ris.utwente.nl:publications/9cd90f29-52cc-4a5f-b87f-eb7f467da4cd","is_oa":true,"landing_page_url":"https://research.utwente.nl/en/publications/9cd90f29-52cc-4a5f-b87f-eb7f467da4cd","pdf_url":"https://ris.utwente.nl/ws/files/5340172/2013_krikke_truong.pdf","source":{"id":"https://openalex.org/S4406922991","display_name":"University of Twente Research Information","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Krikke, T F & Truong, K P 2013, Detection of nonverbal vocalizations using Gaussian Mixture Models: looking for fillers and laughter in conversational speech. in Proceedings of the 14th Annual Conference of the International Speech Communication Association, Interspeech 2013. International Speech Communication Association, Baixas, Framce, pp. 163-167, 14th Annual Conference of the International Speech Communication Association, INTERSPEECH 2013, Lyon, France, 25/08/13. < http://www.isca-speech.org/archive/interspeech_2013/i13_0163.html >","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4300000071525574}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W45820414.pdf","grobid_xml":"https://content.openalex.org/works/W45820414.grobid-xml"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W152150158","https://openalex.org/W198720948","https://openalex.org/W206810795","https://openalex.org/W209307208","https://openalex.org/W1503417225","https://openalex.org/W1524423290","https://openalex.org/W1591747970","https://openalex.org/W1875231349","https://openalex.org/W2044315228","https://openalex.org/W2058941224","https://openalex.org/W2082054210","https://openalex.org/W2082389970","https://openalex.org/W2134473538","https://openalex.org/W2139197656","https://openalex.org/W2144005487","https://openalex.org/W2168524283","https://openalex.org/W2288575317","https://openalex.org/W2917076449","https://openalex.org/W4233494366"],"related_works":["https://openalex.org/W2100012411","https://openalex.org/W1482212662","https://openalex.org/W3162157266","https://openalex.org/W2162084437","https://openalex.org/W1997579527","https://openalex.org/W3044927199","https://openalex.org/W2102353451","https://openalex.org/W2018086531","https://openalex.org/W1980297060","https://openalex.org/W2387604097"],"abstract_inverted_index":{"In":[0,95],"this":[1,138],"paper,":[2],"we":[3,32,115],"analyze":[4],"acoustic":[5],"profiles":[6],"of":[7,27,43,53,71,113,137],"fillers":[8],"(i.e.":[9],"filled":[10],"pauses,":[11],"FPs)":[12],"and":[13,45,68,93,130],"laughter":[14,44],"with":[15,64],"the":[16,39,51,54,99,103,135],"aim":[17],"to":[18,37,49,87,97,120],"automatically":[19],"localize":[20],"these":[21,121],"nonverbal":[22],"vocalizations":[23],"in":[24,85,102,111],"a":[25,117],"stream":[26],"audio.":[28],"Among":[29],"other":[30,88],"features,":[31],"use":[33],"voice":[34],"quality":[35],"features":[36,72,89],"capture":[38,50],"distinctive":[40],"production":[41],"modes":[42],"spectral":[46],"similarity":[47],"measures":[48],"stability":[52],"oral":[55],"tract":[56],"that":[57,77],"is":[58],"characteristic":[59],"for":[60,90],"FPs.":[61],"Classification":[62],"experiments":[63],"Gaussian":[65],"Mixture":[66],"Models":[67],"various":[69],"sets":[70],"are":[73,81,132],"performed.":[74],"We":[75],"find":[76],"Mel-Frequency":[78],"Cepstrum":[79],"Coefficients":[80],"performing":[82],"relatively":[83],"well":[84],"comparison":[86],"both":[91],"FPs":[92],"laughter.":[94],"order":[96],"address":[98],"large":[100,125],"variation":[101],"frame-wise":[104],"decision":[105],"scores":[106],"(e.g.,":[107],"log-likelihood":[108],"ratios)":[109],"observed":[110],"sequences":[112],"frames":[114],"apply":[116],"median":[118],"filter":[119],"scores,":[122],"which":[123],"yields":[124],"performance":[126],"improvements.":[127],"Our":[128],"analyses":[129],"results":[131],"presented":[133],"within":[134],"framework":[136],"year\u2019s":[139],"Interspeech":[140],"Computational":[141],"Paralinguistics":[142],"sub-Challenge":[143],"on":[144],"Social":[145],"Signals.":[146]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
