{"id":"https://openalex.org/W4205501836","doi":"https://doi.org/10.1109/lsp.2021.3135192","title":"A Convolutional-Attentional Neural Framework for Structure-Aware Performance-Score Synchronization","display_name":"A Convolutional-Attentional Neural Framework for Structure-Aware Performance-Score Synchronization","publication_year":2021,"publication_date":"2021-12-14","ids":{"openalex":"https://openalex.org/W4205501836","doi":"https://doi.org/10.1109/lsp.2021.3135192"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2021.3135192","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2021.3135192","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034426967","display_name":"Ruchit Agrawal","orcid":"https://orcid.org/0000-0002-3609-9589"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Ruchit Agrawal","raw_affiliation_strings":["Centre for Digital Music, Queen Mary University of London, London, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Digital Music, Queen Mary University of London, London, U.K","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045832504","display_name":"Daniel Wolff","orcid":"https://orcid.org/0000-0003-4550-1442"},"institutions":[{"id":"https://openalex.org/I35345632","display_name":"Institut de Recherche et Coordination Acoustique Musique","ror":"https://ror.org/0121jnt59","country_code":"FR","type":"education","lineage":["https://openalex.org/I35345632"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Daniel Wolff","raw_affiliation_strings":["Institute for Research and Coordination in Acoustics/Music, Paris, France"],"affiliations":[{"raw_affiliation_string":"Institute for Research and Coordination in Acoustics/Music, Paris, France","institution_ids":["https://openalex.org/I35345632"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077484271","display_name":"Simon Dixon","orcid":"https://orcid.org/0000-0002-6098-481X"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Simon Dixon","raw_affiliation_strings":["Centre for Digital Music, Queen Mary University of London, London, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Digital Music, Queen Mary University of London, London, U.K","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5034426967"],"corresponding_institution_ids":["https://openalex.org/I166337079"],"apc_list":null,"apc_paid":null,"fwci":0.6173,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.68714486,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"29","issue":null,"first_page":"344","last_page":"348"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10788","display_name":"Neuroscience and Music Perception","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.779400110244751},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.6613295078277588},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6342805624008179},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5183404684066772},{"id":"https://openalex.org/keywords/f1-score","display_name":"F1 score","score":0.4999251365661621},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4681326150894165},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4573274850845337},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.43967896699905396},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.42308509349823},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4199952483177185},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.38596487045288086},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.348379909992218},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.18008482456207275}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.779400110244751},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.6613295078277588},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6342805624008179},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5183404684066772},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.4999251365661621},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4681326150894165},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4573274850845337},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.43967896699905396},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.42308509349823},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4199952483177185},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38596487045288086},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.348379909992218},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.18008482456207275},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lsp.2021.3135192","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2021.3135192","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},{"id":"pmh:oai:qmro.qmul.ac.uk:123456789/77428","is_oa":false,"landing_page_url":"https://qmro.qmul.ac.uk/xmlui/handle/123456789/77428","pdf_url":null,"source":{"id":"https://openalex.org/S4306400530","display_name":"Queen Mary Research Online (Queen Mary University of London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I166337079","host_organization_name":"Queen Mary University of London","host_organization_lineage":["https://openalex.org/I166337079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1974542962","display_name":null,"funder_award_id":"Sk\u0142odowska","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G2689612763","display_name":null,"funder_award_id":"Marie","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4924294516","display_name":null,"funder_award_id":"765068","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4956428346","display_name":null,"funder_award_id":"Horizon 2020 research and innovatio","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G50887856","display_name":null,"funder_award_id":"765068","funder_id":"https://openalex.org/F4320338337","funder_display_name":"H2020 Marie Sk\u0142odowska-Curie Actions"},{"id":"https://openalex.org/G8318064016","display_name":null,"funder_award_id":"Horizon","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320338337","display_name":"H2020 Marie Sk\u0142odowska-Curie Actions","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W40088496","https://openalex.org/W67806156","https://openalex.org/W1510799621","https://openalex.org/W1849277567","https://openalex.org/W1875892135","https://openalex.org/W2000842688","https://openalex.org/W2024511855","https://openalex.org/W2150696241","https://openalex.org/W2155515618","https://openalex.org/W2160499613","https://openalex.org/W2191779130","https://openalex.org/W2476446632","https://openalex.org/W2478051194","https://openalex.org/W2535388113","https://openalex.org/W2584761930","https://openalex.org/W2623638694","https://openalex.org/W2797057408","https://openalex.org/W2890559714","https://openalex.org/W2920582597","https://openalex.org/W3015963521","https://openalex.org/W3117303186","https://openalex.org/W3162873781","https://openalex.org/W6600858156","https://openalex.org/W6603836127","https://openalex.org/W6605019438","https://openalex.org/W6607608197","https://openalex.org/W6608238415","https://openalex.org/W6682511423","https://openalex.org/W6685805525","https://openalex.org/W6713718600","https://openalex.org/W6734312481","https://openalex.org/W6739901393","https://openalex.org/W6742368143","https://openalex.org/W6750469568","https://openalex.org/W6753035467","https://openalex.org/W6763367864","https://openalex.org/W6768985362","https://openalex.org/W6770058728","https://openalex.org/W6781255595","https://openalex.org/W6781560242","https://openalex.org/W6784591980"],"related_works":["https://openalex.org/W2289868279","https://openalex.org/W4315836293","https://openalex.org/W2970176078","https://openalex.org/W4231351862","https://openalex.org/W4212794605","https://openalex.org/W4243888788","https://openalex.org/W2157165686","https://openalex.org/W1975359510","https://openalex.org/W3004352674","https://openalex.org/W2088690926"],"abstract_inverted_index":{"Performance-score":[0],"synchronization":[1,28,111],"is":[2,127,140],"an":[3,12,16],"integral":[4],"task":[5],"in":[6],"signal":[7],"processing,":[8],"which":[9,139],"entails":[10],"generating":[11],"accurate":[13],"mapping":[14],"between":[15,133],"audio":[17],"recording":[18],"of":[19,92,116,144],"a":[20,51,61,66,114,141],"performance":[21,135],"and":[22,34,37,47,79,98,122,136],"the":[23,77,90,134],"corresponding":[24],"musical":[25],"score.":[26],"Traditional":[27],"methods":[29,112],"compute":[30],"alignment":[31,81,102,146],"using":[32],"knowledge-driven":[33],"stochastic":[35],"approaches,":[36],"are":[38],"typically":[39],"unable":[40],"to":[41,44,84,130],"generalize":[42],"well":[43],"different":[45,85],"domains":[46],"modalities.":[48,87],"We":[49,59,73,88,104],"present":[50],"novel":[52],"data-driven":[53],"method":[54,94,126],"for":[55,76,113],"structure-aware":[56],"performance-score":[57],"synchronization.":[58],"propose":[60],"convolutional-attentional":[62],"architecture":[63],"trained":[64],"with":[65,100],"custom":[67],"loss":[68],"based":[69],"on":[70],"time-series":[71],"divergence.":[72],"conduct":[74],"experiments":[75],"audio-to-MIDI":[78],"audio-to-image":[80],"tasks":[82],"pertained":[83],"score":[86,120,137],"validate":[89],"effectiveness":[91],"our":[93,107],"via":[95],"ablation":[96],"studies":[97],"comparisons":[99],"state-of-the-art":[101],"approaches.":[103,147],"demonstrate":[105],"that":[106],"approach":[108],"outperforms":[109],"previous":[110],"variety":[115],"test":[117],"settings":[118],"across":[119],"modalities":[121],"acoustic":[123],"conditions.":[124],"Our":[125],"also":[128],"robust":[129],"structural":[131],"differences":[132],"sequences,":[138],"common":[142],"limitation":[143],"standard":[145]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
