{"id":"https://openalex.org/W2105343238","doi":"https://doi.org/10.1162/comj_a_00146","title":"A Shift-Invariant Latent Variable Model for Automatic Music Transcription","display_name":"A Shift-Invariant Latent Variable Model for Automatic Music Transcription","publication_year":2012,"publication_date":"2012-12-01","ids":{"openalex":"https://openalex.org/W2105343238","doi":"https://doi.org/10.1162/comj_a_00146","mag":"2105343238"},"language":"en","primary_location":{"id":"doi:10.1162/comj_a_00146","is_oa":false,"landing_page_url":"https://doi.org/10.1162/comj_a_00146","pdf_url":null,"source":{"id":"https://openalex.org/S165362224","display_name":"Computer Music Journal","issn_l":"0148-9267","issn":["0148-9267","1531-5169"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computer Music Journal","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084672392","display_name":"Emmanouil Benetos","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Emmanouil Benetos","raw_affiliation_strings":["Centre for Digital Music, School of Electronic Engineering and Computer Science, Queen Mary University of London, Mile End Road, London E1 4NS, UK. ,"],"affiliations":[{"raw_affiliation_string":"Centre for Digital Music, School of Electronic Engineering and Computer Science, Queen Mary University of London, Mile End Road, London E1 4NS, UK. ,","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077484271","display_name":"Simon Dixon","orcid":"https://orcid.org/0000-0002-6098-481X"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Simon Dixon","raw_affiliation_strings":["Centre for Digital Music, School of Electronic Engineering and Computer Science, Queen Mary University of London, Mile End Road, London E1 4NS, UK. ,","Centre for Digital Music, School of Electronic Engineering and Computer Science, Queen Mary University of London, Mile End Road, London E1 4NS, UK. emmanouilb@eecs.qmul.ac.uk simond@eecs.qmul.ac.u ...#TAB#"],"affiliations":[{"raw_affiliation_string":"Centre for Digital Music, School of Electronic Engineering and Computer Science, Queen Mary University of London, Mile End Road, London E1 4NS, UK. ,","institution_ids":["https://openalex.org/I166337079"]},{"raw_affiliation_string":"Centre for Digital Music, School of Electronic Engineering and Computer Science, Queen Mary University of London, Mile End Road, London E1 4NS, UK. emmanouilb@eecs.qmul.ac.uk simond@eecs.qmul.ac.u ...#TAB#","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5084672392"],"corresponding_institution_ids":["https://openalex.org/I166337079"],"apc_list":null,"apc_paid":null,"fwci":4.4591,"has_fulltext":false,"cited_by_count":72,"citation_normalized_percentile":{"value":0.96399014,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"36","issue":"4","first_page":"81","last_page":"94"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6969425678253174},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5978908538818359},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.5625022649765015},{"id":"https://openalex.org/keywords/latent-variable","display_name":"Latent variable","score":0.5083214640617371},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5066424012184143},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.49309441447257996},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.47862711548805237},{"id":"https://openalex.org/keywords/statistical-model","display_name":"Statistical model","score":0.45300236344337463},{"id":"https://openalex.org/keywords/smoothing","display_name":"Smoothing","score":0.4265735149383545},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.4254884421825409},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4195975661277771},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39615464210510254},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.19965016841888428},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19445785880088806}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6969425678253174},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5978908538818359},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.5625022649765015},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.5083214640617371},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5066424012184143},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.49309441447257996},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.47862711548805237},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.45300236344337463},{"id":"https://openalex.org/C3770464","wikidata":"https://www.wikidata.org/wiki/Q775963","display_name":"Smoothing","level":2,"score":0.4265735149383545},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.4254884421825409},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4195975661277771},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39615464210510254},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.19965016841888428},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19445785880088806},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1162/comj_a_00146","is_oa":false,"landing_page_url":"https://doi.org/10.1162/comj_a_00146","pdf_url":null,"source":{"id":"https://openalex.org/S165362224","display_name":"Computer Music Journal","issn_l":"0148-9267","issn":["0148-9267","1531-5169"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computer Music Journal","raw_type":"journal-article"},{"id":"pmh:oai:openaccess.city.ac.uk:2427","is_oa":false,"landing_page_url":"https://doi.org/10.1162/comj_a_00146>","pdf_url":null,"source":{"id":"https://openalex.org/S4306401940","display_name":"City Research Online (City University London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I180825142","host_organization_name":"City, University of London","host_organization_lineage":["https://openalex.org/I180825142"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"},{"id":"pmh:oai:muse.jhu.edu:/article/493760","is_oa":false,"landing_page_url":"https://muse.jhu.edu/article/493760","pdf_url":null,"source":{"id":"https://openalex.org/S4377196299","display_name":"Project Muse (Johns Hopkins University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I145311948","host_organization_name":"Johns Hopkins University","host_organization_lineage":["https://openalex.org/I145311948"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W89231944","https://openalex.org/W129413713","https://openalex.org/W144105221","https://openalex.org/W1607142029","https://openalex.org/W1759597718","https://openalex.org/W1995458972","https://openalex.org/W2000911789","https://openalex.org/W2038761658","https://openalex.org/W2042659457","https://openalex.org/W2049633694","https://openalex.org/W2059443479","https://openalex.org/W2104299881","https://openalex.org/W2106733070","https://openalex.org/W2110007838","https://openalex.org/W2120160933","https://openalex.org/W2120809168","https://openalex.org/W2121148209","https://openalex.org/W2125722442","https://openalex.org/W2125838338","https://openalex.org/W2135151673","https://openalex.org/W2139122061","https://openalex.org/W2144414181","https://openalex.org/W2154611643","https://openalex.org/W2157313348","https://openalex.org/W2596136048","https://openalex.org/W3129711340","https://openalex.org/W3144915787","https://openalex.org/W4251142838","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W2011227383","https://openalex.org/W4375868962","https://openalex.org/W2088854863","https://openalex.org/W1976719989","https://openalex.org/W2942893872","https://openalex.org/W2065606036","https://openalex.org/W3179495260","https://openalex.org/W3127543252","https://openalex.org/W2016904525"],"abstract_inverted_index":{"In":[0,65],"this":[1,55],"work,":[2],"a":[3,47,129],"probabilistic":[4,18],"model":[5,14],"for":[6,26,51,61,76,86],"multiple-instrument":[7,62,122],"automatic":[8,63],"music":[9],"transcription":[10,117,151],"is":[11,24],"proposed.":[12],"The":[13,116],"extends":[15],"the":[16,32,67,71,126,134,143,150],"shift-invariant":[17,68],"latent":[19],"component":[20],"analysis":[21],"method,":[22],"which":[23],"used":[25,60],"spectrogram":[27],"factorization.":[28],"Proposed":[29],"extensions":[30],"support":[31],"use":[33],"of":[34,70],"multiple":[35],"spectral":[36],"templates":[37,104],"per":[38,41],"pitch":[39,49,88,103],"and":[40,80,93,133],"instrument":[42],"source,":[43],"as":[44,46,83,85],"well":[45,84],"time-varying":[48],"contribution":[50],"each":[52],"source.":[53],"Thus,":[54],"method":[56,72,145],"can":[57,73],"effectively":[58],"be":[59,74],"transcription.":[64],"addition,":[66],"aspect":[69],"exploited":[75],"detecting":[77],"tuning":[78],"changes":[79],"frequency":[81],"modulations,":[82],"visualizing":[87],"content.":[89],"For":[90,101],"note":[91,114],"tracking":[92],"smoothing,":[94],"pitch-wise":[95],"hidden":[96],"Markov":[97],"models":[98],"are":[99],"used.":[100],"training,":[102],"from":[105,125,149],"eight":[106],"orchestral":[107],"instruments":[108],"were":[109],"extracted,":[110],"covering":[111],"their":[112],"complete":[113],"range.":[115],"system":[118],"was":[119],"tested":[120],"on":[121],"polyphonic":[123],"recordings":[124],"RWC":[127],"database,":[128],"Disklavier":[130],"data":[131,138],"set,":[132],"MIREX":[135],"2007":[136],"multi-F0":[137],"set.":[139],"Results":[140],"demonstrate":[141],"that":[142],"proposed":[144],"outperforms":[146],"leading":[147],"approaches":[148],"literature,":[152],"using":[153],"several":[154],"error":[155],"metrics.":[156]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":17},{"year":2016,"cited_by_count":11},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":6},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
