{"id":"https://openalex.org/W1581975840","doi":"https://doi.org/10.21437/interspeech.2012-217","title":"Automatic detection of high vocal effort in telephone speech","display_name":"Automatic detection of high vocal effort in telephone speech","publication_year":2012,"publication_date":"2012-09-09","ids":{"openalex":"https://openalex.org/W1581975840","doi":"https://doi.org/10.21437/interspeech.2012-217","mag":"1581975840"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2012-217","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2012-217","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2012","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/42e56d67-67b6-43dd-a1c0-ea228edb91a6","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008322847","display_name":"Jouni Pohjalainen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jouni Pohjalainen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000571465","display_name":"Tuomo Raitio","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tuomo Raitio","raw_affiliation_strings":["Helsinki University of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Helsinki University of Technology","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076286418","display_name":"Hannu Pulakka","orcid":null},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Hannu Pulakka","raw_affiliation_strings":["Aalto University, Espoo, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042186400","display_name":"Paavo Alku","orcid":"https://orcid.org/0000-0002-8173-9418"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Paavo Alku","raw_affiliation_strings":["Aalto University, Espoo, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.8846,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.78203043,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"691","last_page":"694"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.8500510454177856},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.8090434074401855},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7753919363021851},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.6374038457870483},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.6241152286529541},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.554596483707428},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.48445701599121094},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.44176778197288513},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.435903400182724},{"id":"https://openalex.org/keywords/background-noise","display_name":"Background noise","score":0.43526870012283325},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.42190372943878174},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.4178856611251831},{"id":"https://openalex.org/keywords/linear-predictive-coding","display_name":"Linear predictive coding","score":0.41393980383872986},{"id":"https://openalex.org/keywords/telephony","display_name":"Telephony","score":0.4106840491294861},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4059422016143799},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.11433526873588562}],"concepts":[{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.8500510454177856},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.8090434074401855},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7753919363021851},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.6374038457870483},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.6241152286529541},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.554596483707428},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.48445701599121094},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44176778197288513},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.435903400182724},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.43526870012283325},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.42190372943878174},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.4178856611251831},{"id":"https://openalex.org/C59883199","wikidata":"https://www.wikidata.org/wiki/Q1826438","display_name":"Linear predictive coding","level":3,"score":0.41393980383872986},{"id":"https://openalex.org/C195358072","wikidata":"https://www.wikidata.org/wiki/Q944584","display_name":"Telephony","level":2,"score":0.4106840491294861},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4059422016143799},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.11433526873588562},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.21437/interspeech.2012-217","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2012-217","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2012","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:ec_fundedresources/42e56d67-67b6-43dd-a1c0-ea228edb91a6","is_oa":false,"landing_page_url":"http://hdl.handle.net/20.500.11820/42e56d67-67b6-43dd-a1c0-ea228edb91a6","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:pure.ed.ac.uk:openaire/42e56d67-67b6-43dd-a1c0-ea228edb91a6","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/42e56d67-67b6-43dd-a1c0-ea228edb91a6","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Pohjalainen, J, Raitio, T, Pulakka, H & Alku, P 2012, Automatic Detection of High Vocal Effort in Telephone Speech. in INTERSPEECH 2012, 13th Annual Conference of the International Speech Communication Association : Portland, Oregon, USA, September 9-13, 2012. < http://www.isca-speech.org/archive/interspeech_2012 >","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/42e56d67-67b6-43dd-a1c0-ea228edb91a6","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/42e56d67-67b6-43dd-a1c0-ea228edb91a6","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Pohjalainen, J, Raitio, T, Pulakka, H & Alku, P 2012, Automatic Detection of High Vocal Effort in Telephone Speech. in INTERSPEECH 2012, 13th Annual Conference of the International Speech Communication Association : Portland, Oregon, USA, September 9-13, 2012. < http://www.isca-speech.org/archive/interspeech_2012 >","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7400000095367432,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W88864901","https://openalex.org/W177407077","https://openalex.org/W1625928172","https://openalex.org/W1966264494","https://openalex.org/W1968637674","https://openalex.org/W2004826498","https://openalex.org/W2013355849","https://openalex.org/W2019915615","https://openalex.org/W2069501481","https://openalex.org/W2086381991","https://openalex.org/W2096672501","https://openalex.org/W2122051577","https://openalex.org/W2135131618","https://openalex.org/W2165880886","https://openalex.org/W2166223208","https://openalex.org/W2183282870","https://openalex.org/W2399953370","https://openalex.org/W2401495198"],"related_works":["https://openalex.org/W1852231985","https://openalex.org/W2080325429","https://openalex.org/W2184306570","https://openalex.org/W2351647310","https://openalex.org/W2140099343","https://openalex.org/W2115277869","https://openalex.org/W1976952689","https://openalex.org/W1572861854","https://openalex.org/W2183208835","https://openalex.org/W2341426843"],"abstract_inverted_index":{"A":[0],"system":[1,15,50],"is":[2,16,37,51],"proposed":[3,49],"for":[4],"the":[5,28,34,62,67,74,78,81],"automatic":[6],"detection":[7],"of":[8,30,56],"high":[9],"vocal":[10],"effort":[11],"in":[12,33,80],"speech.":[13,25],"The":[14,48],"evaluated":[17],"using":[18],"both":[19,39],"PCMcoded":[20],"speech":[21],"and":[22,42],"AMR-coded":[23],"telephone":[24,35],"In":[26],"addition,":[27],"effect":[29],"far-end":[31],"noise":[32,46],"conditions":[36],"studied<br/>using":[38],"matched-condition":[40],"training":[41],"cases":[43],"with":[44],"additive":[45],"mismatch.":[47],"based":[52],"on":[53],"Bayesian":[54],"classification":[55],"mel-frequency":[57],"cepstral":[58],"feature":[59,64],"vectors.":[60],"Concerning":[61],"MFCC":[63],"extraction":[65],"process,":[66],"substitution<br/>of":[68],"a":[69],"spectrum":[70],"analysis":[71],"method":[72],"emphasizing":[73],"fine":[75],"structure":[76],"improves":[77],"results":[79],"noisy":[82],"cases.":[83]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
