{"id":"https://openalex.org/W2156230116","doi":"https://doi.org/10.1109/icassp.2010.5495585","title":"Recognition of phonemes and words in singing","display_name":"Recognition of phonemes and words in singing","publication_year":2010,"publication_date":"2010-01-01","ids":{"openalex":"https://openalex.org/W2156230116","doi":"https://doi.org/10.1109/icassp.2010.5495585","mag":"2156230116"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2010.5495585","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2010.5495585","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE International Conference on Acoustics, Speech and Signal Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079981416","display_name":"Annamaria Mesaros","orcid":"https://orcid.org/0000-0002-6640-9752"},"institutions":[{"id":"https://openalex.org/I4210133110","display_name":"Tampere University","ror":null,"country_code":"FI","type":null,"lineage":["https://openalex.org/I4210133110"]},{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Annamaria Mesaros","raw_affiliation_strings":["Department of Signal Processing, Tampere University of Technology, Tampere, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Signal Processing, Tampere University of Technology, Tampere, Finland","institution_ids":["https://openalex.org/I166825849","https://openalex.org/I4210133110"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049691461","display_name":"Tuomas Virtanen","orcid":"https://orcid.org/0000-0002-4604-9729"},"institutions":[{"id":"https://openalex.org/I4210133110","display_name":"Tampere University","ror":null,"country_code":"FI","type":null,"lineage":["https://openalex.org/I4210133110"]},{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Tuomas Virtanen","raw_affiliation_strings":["Department of Signal Processing, Tampere University of Technology, Tampere, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Signal Processing, Tampere University of Technology, Tampere, Finland","institution_ids":["https://openalex.org/I166825849","https://openalex.org/I4210133110"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5079981416"],"corresponding_institution_ids":["https://openalex.org/I166825849","https://openalex.org/I4210133110"],"apc_list":null,"apc_paid":null,"fwci":2.3274,"has_fulltext":false,"cited_by_count":28,"citation_normalized_percentile":{"value":0.90139363,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"2146","last_page":"2149"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bigram","display_name":"Bigram","score":0.9628844261169434},{"id":"https://openalex.org/keywords/trigram","display_name":"Trigram","score":0.8840198516845703},{"id":"https://openalex.org/keywords/lyrics","display_name":"Lyrics","score":0.8530535697937012},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.8231696486473083},{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.8057596683502197},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7703338861465454},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5526323914527893},{"id":"https://openalex.org/keywords/polyphony","display_name":"Polyphony","score":0.5354822874069214},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5318673849105835},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.500645637512207},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49383124709129333},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.49349963665008545},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.19433194398880005},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.06777602434158325}],"concepts":[{"id":"https://openalex.org/C108757681","wikidata":"https://www.wikidata.org/wiki/Q2773912","display_name":"Bigram","level":3,"score":0.9628844261169434},{"id":"https://openalex.org/C137546455","wikidata":"https://www.wikidata.org/wiki/Q3213474","display_name":"Trigram","level":2,"score":0.8840198516845703},{"id":"https://openalex.org/C2776436406","wikidata":"https://www.wikidata.org/wiki/Q602446","display_name":"Lyrics","level":2,"score":0.8530535697937012},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.8231696486473083},{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.8057596683502197},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7703338861465454},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5526323914527893},{"id":"https://openalex.org/C128979739","wikidata":"https://www.wikidata.org/wiki/Q179465","display_name":"Polyphony","level":2,"score":0.5354822874069214},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5318673849105835},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.500645637512207},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49383124709129333},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.49349963665008545},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.19433194398880005},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.06777602434158325},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp.2010.5495585","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2010.5495585","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE International Conference on Acoustics, Speech and Signal Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.722.1941","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.722.1941","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.tut.fi/%7Emesaros/pubs/singrec.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6600000262260437}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W182406043","https://openalex.org/W182831726","https://openalex.org/W2007431733","https://openalex.org/W2121238885","https://openalex.org/W2144731719","https://openalex.org/W2146871184","https://openalex.org/W2164282073","https://openalex.org/W2168403679","https://openalex.org/W2398871418","https://openalex.org/W2403272360","https://openalex.org/W4285719527","https://openalex.org/W6607423309","https://openalex.org/W6607467106","https://openalex.org/W6677693164","https://openalex.org/W6681443795","https://openalex.org/W6712736788","https://openalex.org/W6713555227"],"related_works":["https://openalex.org/W2105076537","https://openalex.org/W2002221802","https://openalex.org/W2041167939","https://openalex.org/W2250909759","https://openalex.org/W2020757772","https://openalex.org/W2562995433","https://openalex.org/W1500873938","https://openalex.org/W1666710534","https://openalex.org/W2223833155","https://openalex.org/W2028371633"],"abstract_inverted_index":{"This":[0],"paper":[1],"studies":[2],"the":[3,10,45,78,109,116,125,128,136,141,146,154,164,173],"influence":[4],"of":[5,12,108,127,172],"n-gram":[6],"language":[7,23,35,86,98],"models":[8,24,61],"in":[9,113,131,148,170],"recognition":[11,46,80,129,156,162],"sung":[13],"phonemes":[14,26],"and":[15,21,27,29,67,91],"words.":[16,32],"We":[17,123],"train":[18],"uni-,":[19],"bi-,":[20],"trigram":[22],"for":[25,31,145],"bi-":[28],"trigrams":[30],"The":[33,60],"word-level":[34],"model":[36,52],"is":[37,168],"estimated":[38],"from":[39,72,83,95],"a":[40,49,132,149],"textual":[41],"lyrics":[42,151],"database.":[43,152],"In":[44,103],"we":[47,139],"use":[48,126],"hidden":[50],"Markov":[51],"based":[53],"phonetic":[54],"recognizer":[55],"adapted":[56],"to":[57,88,100],"singing":[58,66,77],"voice.":[59],"were":[62,111],"tested":[63],"on":[64,68,92,120],"monophonic":[65],"vocal":[69],"lines":[70],"separated":[71],"polyphonic":[73,93,121],"music.":[74,122],"On":[75],"clean":[76,114],"phoneme":[79],"accuracies":[81],"varied":[82],"20%":[84,101],"(no":[85,97],"model)":[87,99],"39%":[89],"(bigram)":[90],"music":[94],"6%":[96],"(bigram).":[102],"word":[104,155],"recognition,":[105],"one":[106],"fifth":[107],"words":[110],"recognized":[112,137],"singing,":[115],"performance":[117],"being":[118],"lower":[119],"study":[124],"results":[130],"query-by-singing":[133],"application.":[134],"Using":[135],"words,":[138],"retrieve":[140],"songs":[142],"by":[143],"searching":[144],"text":[147,150],"For":[153],"system":[157],"having":[158],"only":[159],"24%":[160],"correct":[161,169],"rate,":[163],"first":[165],"retrieved":[166],"song":[167],"57%":[171],"test":[174],"cases.":[175]},"counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
