{"id":"https://openalex.org/W3094346571","doi":"https://doi.org/10.1109/icpr48806.2021.9412155","title":"The Effect of Spectrogram Reconstruction on Automatic Music Transcription: An Alternative Approach to Improve Transcription Accuracy","display_name":"The Effect of Spectrogram Reconstruction on Automatic Music Transcription: An Alternative Approach to Improve Transcription Accuracy","publication_year":2021,"publication_date":"2021-01-10","ids":{"openalex":"https://openalex.org/W3094346571","doi":"https://doi.org/10.1109/icpr48806.2021.9412155","mag":"3094346571"},"language":"en","primary_location":{"id":"doi:10.1109/icpr48806.2021.9412155","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr48806.2021.9412155","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 25th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2010.09969","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069612434","display_name":"Kin Wai Cheuk","orcid":"https://orcid.org/0000-0003-3213-8242"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Kin Wai Cheuk","raw_affiliation_strings":["Institute of High Performance Computing, Agency for Science, Technology and Research","Institute of High Performance Computing, Agency for Science Technology and Research"],"affiliations":[{"raw_affiliation_string":"Institute of High Performance Computing, Agency for Science, Technology and Research","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]},{"raw_affiliation_string":"Institute of High Performance Computing, Agency for Science Technology and Research","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029344951","display_name":"Yin-Jyun Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yin-Jvun Luo","raw_affiliation_strings":["School of Electronic Engineering and Computer Science, Queen Mary University of London","School of Electronic Engineering and Computer Science Queen Mary, University of London"],"affiliations":[{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London","institution_ids":["https://openalex.org/I166337079"]},{"raw_affiliation_string":"School of Electronic Engineering and Computer Science Queen Mary, University of London","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084672392","display_name":"Emmanouil Benetos","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Emmanouil Benetos","raw_affiliation_strings":["School of Electronic Engineering and Computer Science, Queen Mary University of London","School of Electronic Engineering and Computer Science Queen Mary, University of London"],"affiliations":[{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London","institution_ids":["https://openalex.org/I166337079"]},{"raw_affiliation_string":"School of Electronic Engineering and Computer Science Queen Mary, University of London","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069548004","display_name":"Dorien Herremans","orcid":"https://orcid.org/0000-0001-8607-1640"},"institutions":[{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]},{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Dorien Herremans","raw_affiliation_strings":["Institute of High Performance Computing, Agency for Science, Technology and Research","Institute of High Performance Computing, Agency for Science Technology and Research"],"affiliations":[{"raw_affiliation_string":"Institute of High Performance Computing, Agency for Science, Technology and Research","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]},{"raw_affiliation_string":"Institute of High Performance Computing, Agency for Science Technology and Research","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5069612434"],"corresponding_institution_ids":["https://openalex.org/I115228651","https://openalex.org/I3004594783"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00601926,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"9091","last_page":"9098"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.9415115118026733},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.7438485622406006},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6575068235397339},{"id":"https://openalex.org/keywords/offset","display_name":"Offset (computer science)","score":0.5901865363121033},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5023765563964844},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4683762192726135},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46442925930023193},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.34185734391212463},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33233368396759033}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.9415115118026733},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.7438485622406006},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6575068235397339},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.5901865363121033},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5023765563964844},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4683762192726135},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46442925930023193},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34185734391212463},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33233368396759033},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icpr48806.2021.9412155","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr48806.2021.9412155","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 25th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2010.09969","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2010.09969","pdf_url":"https://arxiv.org/pdf/2010.09969","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3094346571","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2010.09969","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2010.09969","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2010.09969","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2010.09969","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2010.09969","pdf_url":"https://arxiv.org/pdf/2010.09969","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4866931537","display_name":null,"funder_award_id":"MOE2018-T2-2-161,SRG ISTD 2017 129","funder_id":"https://openalex.org/F4320311039","funder_display_name":"Ministry of Higher Education and Scientific Research"},{"id":"https://openalex.org/G6205196381","display_name":null,"funder_award_id":"SING-2018-02-0204","funder_id":"https://openalex.org/F4320320696","funder_display_name":"Agency for Science, Technology and Research"}],"funders":[{"id":"https://openalex.org/F4320311039","display_name":"Ministry of Higher Education and Scientific Research","ror":"https://ror.org/00kab6t91"},{"id":"https://openalex.org/F4320320696","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09"},{"id":"https://openalex.org/F4320320705","display_name":"Singapore International Foundation","ror":"https://ror.org/02jehgd93"},{"id":"https://openalex.org/F4320322724","display_name":"Ministry of Education, India","ror":"https://ror.org/048xjjh50"},{"id":"https://openalex.org/F4320333848","display_name":"Forsknings- og Innovationsstyrelsen","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3094346571.pdf","grobid_xml":"https://content.openalex.org/works/W3094346571.grobid-xml"},"referenced_works_count":61,"referenced_works":["https://openalex.org/W89231944","https://openalex.org/W1522301498","https://openalex.org/W1532956126","https://openalex.org/W1819710477","https://openalex.org/W1901129140","https://openalex.org/W2017416504","https://openalex.org/W2096354165","https://openalex.org/W2096482524","https://openalex.org/W2104298926","https://openalex.org/W2123045220","https://openalex.org/W2125722442","https://openalex.org/W2129192849","https://openalex.org/W2144414181","https://openalex.org/W2147761286","https://openalex.org/W2152937398","https://openalex.org/W2198584637","https://openalex.org/W2295460171","https://openalex.org/W2407685581","https://openalex.org/W2559688696","https://openalex.org/W2566755135","https://openalex.org/W2593836007","https://openalex.org/W2618946976","https://openalex.org/W2747464893","https://openalex.org/W2771644120","https://openalex.org/W2891575704","https://openalex.org/W2898148140","https://openalex.org/W2902303903","https://openalex.org/W2938774173","https://openalex.org/W2962968839","https://openalex.org/W2963045359","https://openalex.org/W2963551352","https://openalex.org/W2964706117","https://openalex.org/W2990338180","https://openalex.org/W2990559773","https://openalex.org/W2991118122","https://openalex.org/W2991439390","https://openalex.org/W2998160449","https://openalex.org/W3003673875","https://openalex.org/W3015247127","https://openalex.org/W3046715528","https://openalex.org/W3091273176","https://openalex.org/W3205235423","https://openalex.org/W6603626069","https://openalex.org/W6631190155","https://openalex.org/W6639824700","https://openalex.org/W6677995690","https://openalex.org/W6697000175","https://openalex.org/W6714030504","https://openalex.org/W6730401039","https://openalex.org/W6733936739","https://openalex.org/W6738884980","https://openalex.org/W6743059557","https://openalex.org/W6745851314","https://openalex.org/W6755182157","https://openalex.org/W6756902375","https://openalex.org/W6763354517","https://openalex.org/W6764408361","https://openalex.org/W6770106051","https://openalex.org/W6770469905","https://openalex.org/W6772982332","https://openalex.org/W6773252663"],"related_works":["https://openalex.org/W3160787318","https://openalex.org/W3040811551","https://openalex.org/W3025875702","https://openalex.org/W3132851202","https://openalex.org/W2900132857","https://openalex.org/W3169635929","https://openalex.org/W3158762648","https://openalex.org/W2941619235","https://openalex.org/W3159430391","https://openalex.org/W2985820376","https://openalex.org/W2906042495","https://openalex.org/W2796010067","https://openalex.org/W3015641245","https://openalex.org/W2963103134","https://openalex.org/W3124061379","https://openalex.org/W2394675549","https://openalex.org/W2606052883","https://openalex.org/W3041561163","https://openalex.org/W3204915839","https://openalex.org/W75086142"],"abstract_inverted_index":{"Most":[0],"of":[1,110,230],"the":[2,11,41,48,53,61,96,113,117,127,139,143,148,173,179,186,190,198,205,221,228,231,234,243],"state-of-the-art":[3,90,206],"automatic":[4],"music":[5],"transcription":[6,13,91,181,253],"(AMT)":[7],"models":[8],"break":[9],"down":[10],"main":[12],"task":[14],"into":[15,119,130],"sub-tasks":[16],"such":[17],"as":[18,40],"onset":[19,28],"prediction":[20,23],"and":[21,24,29,38,69,122,142,166,245],"offset":[22,30],"train":[25,44,157],"them":[26],"with":[27,47,65,227],"labels.":[31],"These":[32],"predictions":[33],"are":[34],"then":[35],"concatenated":[36],"together":[37],"used":[39],"input":[42],"to":[43,51,58,146,151,185,201,239],"another":[45],"model":[46,74,108,159,188,235],"pitch":[49,62],"labels":[50,63],"obtain":[52],"final":[54],"transcription.":[55],"We":[56,156],"attempt":[57],"use":[59],"only":[60,153],"(together":[64],"spectrogram":[66,99,118,141,145],"reconstruction":[67,100,134,174,191,232],"loss)":[68],"explore":[70,95],"how":[71],"far":[72],"this":[73,82],"can":[75,176,195],"go":[76],"without":[77,189],"introducing":[78],"supervised":[79],"sub-tasks.":[80],"In":[81],"paper,":[83],"we":[84,94],"do":[85],"not":[86],"aim":[87],"at":[88],"achieving":[89],"accuracy,":[92],"instead,":[93],"effect":[97],"that":[98,171,226],"has":[101],"on":[102,154,160],"our":[103,158,213],"AMT":[104],"model.":[105],"Our":[106,168],"proposed":[107],"consists":[109],"two":[111],"U-nets:":[112],"first":[114],"U-net":[115,125,150,214],"transcribes":[116],"a":[120,123,131,250],"posteriorgram,":[121],"second":[124,149],"transforms":[126],"posteriorgram":[128],"back":[129],"spectrogram.":[132],"A":[133],"loss":[135,175],"is":[136,236],"applied":[137],"between":[138],"original":[140],"reconstructed":[144],"constrain":[147],"focus":[152],"reconstruction.":[155],"three":[161],"different":[162],"datasets:":[163],"MAPS,":[164],"MAESTRO,":[165],"MusicNet.":[167],"experiments":[169],"show":[170],"adding":[172],"generally":[177],"improve":[178],"note-level":[180,252],"accuracy":[182],"when":[183],"compared":[184],"same":[187],"part.":[192],"Moreover,":[193],"it":[194],"also":[196],"boost":[197],"frame-level":[199],"precision":[200],"be":[202],"higher":[203,251],"than":[204],"models.":[207],"The":[208],"feature":[209],"maps":[210],"learned":[211],"by":[212],"contain":[215],"gridlike":[216],"structures":[217],"(not":[218],"present":[219],"in":[220,249],"baseline":[222],"model)":[223],"which":[224],"implies":[225],"presence":[229],"loss,":[233],"probably":[237],"trying":[238],"count":[240],"along":[241],"both":[242],"time":[244],"frequency":[246],"axis,":[247],"resulting":[248],"accuracy.":[254]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
