{"id":"https://openalex.org/W3165571623","doi":"https://doi.org/10.23919/eusipco54536.2021.9616301","title":"Exploiting Temporal Dependencies for Cross-modal Music Piece Identification","display_name":"Exploiting Temporal Dependencies for Cross-modal Music Piece Identification","publication_year":2021,"publication_date":"2021-08-23","ids":{"openalex":"https://openalex.org/W3165571623","doi":"https://doi.org/10.23919/eusipco54536.2021.9616301","mag":"3165571623"},"language":"en","primary_location":{"id":"doi:10.23919/eusipco54536.2021.9616301","is_oa":false,"landing_page_url":"https://doi.org/10.23919/eusipco54536.2021.9616301","pdf_url":null,"source":{"id":"https://openalex.org/S4363607854","display_name":"2021 29th European Signal Processing Conference (EUSIPCO)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 29th European Signal Processing Conference (EUSIPCO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046035965","display_name":"Luis Carvalho","orcid":"https://orcid.org/0000-0002-1344-3463"},"institutions":[{"id":"https://openalex.org/I121883995","display_name":"Johannes Kepler University of Linz","ror":"https://ror.org/052r2xn60","country_code":"AT","type":"education","lineage":["https://openalex.org/I121883995"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Luis Carvalho","raw_affiliation_strings":["Institute of Computational Perception, Johannes Kepler University, Linz, Austria"],"affiliations":[{"raw_affiliation_string":"Institute of Computational Perception, Johannes Kepler University, Linz, Austria","institution_ids":["https://openalex.org/I121883995"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003768123","display_name":"Gerhard Widmer","orcid":"https://orcid.org/0000-0003-3531-1282"},"institutions":[{"id":"https://openalex.org/I121883995","display_name":"Johannes Kepler University of Linz","ror":"https://ror.org/052r2xn60","country_code":"AT","type":"education","lineage":["https://openalex.org/I121883995"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Gerhard Widmer","raw_affiliation_strings":["Institute of Computational Perception, Johannes Kepler University, Linz, Austria","LIT Artificial Intelligence Lab, Johannes Kepler University, Linz, Austria"],"affiliations":[{"raw_affiliation_string":"Institute of Computational Perception, Johannes Kepler University, Linz, Austria","institution_ids":["https://openalex.org/I121883995"]},{"raw_affiliation_string":"LIT Artificial Intelligence Lab, Johannes Kepler University, Linz, Austria","institution_ids":["https://openalex.org/I121883995"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5046035965"],"corresponding_institution_ids":["https://openalex.org/I121883995"],"apc_list":null,"apc_paid":null,"fwci":0.172,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.32034923,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"386","last_page":"390"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8201172351837158},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6968886256217957},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6502367258071899},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.616679310798645},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5338844656944275},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.5110369920730591},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.45293524861335754},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.43897849321365356},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42286187410354614},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3767702579498291},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.35037145018577576},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.19366329908370972},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.15584373474121094},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.07972580194473267}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8201172351837158},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6968886256217957},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6502367258071899},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.616679310798645},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5338844656944275},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.5110369920730591},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.45293524861335754},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.43897849321365356},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42286187410354614},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3767702579498291},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.35037145018577576},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.19366329908370972},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.15584373474121094},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.07972580194473267},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/eusipco54536.2021.9616301","is_oa":false,"landing_page_url":"https://doi.org/10.23919/eusipco54536.2021.9616301","pdf_url":null,"source":{"id":"https://openalex.org/S4363607854","display_name":"2021 29th European Signal Processing Conference (EUSIPCO)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 29th European Signal Processing Conference (EUSIPCO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.44999998807907104,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G5019278271","display_name":null,"funder_award_id":"765068","funder_id":"https://openalex.org/F4320338438","funder_display_name":"HORIZON EUROPE Marie Sklodowska-Curie Actions"}],"funders":[{"id":"https://openalex.org/F4320338438","display_name":"HORIZON EUROPE Marie Sklodowska-Curie Actions","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W123154089","https://openalex.org/W189906980","https://openalex.org/W1506762753","https://openalex.org/W1527575280","https://openalex.org/W1567216536","https://openalex.org/W2406388223","https://openalex.org/W2407410673","https://openalex.org/W2478051194","https://openalex.org/W2620341577","https://openalex.org/W2739755484","https://openalex.org/W2890559714","https://openalex.org/W2953627314","https://openalex.org/W2990825719","https://openalex.org/W3015879946","https://openalex.org/W3021908862","https://openalex.org/W3046038389","https://openalex.org/W3102718214","https://openalex.org/W3186755767","https://openalex.org/W6604987197","https://openalex.org/W6605019438","https://openalex.org/W6607769105","https://openalex.org/W6630444114","https://openalex.org/W6631516269","https://openalex.org/W6713718600","https://openalex.org/W6764724119","https://openalex.org/W6781639648"],"related_works":["https://openalex.org/W2081900870","https://openalex.org/W2389214306","https://openalex.org/W4235240664","https://openalex.org/W2965083567","https://openalex.org/W1838576100","https://openalex.org/W2095886385","https://openalex.org/W2889616422","https://openalex.org/W2089704382","https://openalex.org/W4387830220","https://openalex.org/W4381714195"],"abstract_inverted_index":{"This":[0],"paper":[1],"addresses":[2],"the":[3,13,141,153,159,170],"problem":[4],"of":[5,82,85,106,118,130,161,172],"cross-modal":[6,46,70],"musical":[7],"piece":[8,122],"identification":[9],"and":[10,24,31,56,65,113,123,175],"retrieval:":[11],"finding":[12],"appropriate":[14],"recording(s)":[15],"from":[16,109],"a":[17,20,45,50,61,101],"database":[18],"given":[19],"sheet":[21,33,57,110],"music":[22,34,111],"query,":[23],"vice":[25],"versa,":[26],"working":[27],"directly":[28],"with":[29,49],"audio":[30,55,114],"scanned":[32],"images.":[35],"The":[36],"fundamental":[37],"approach":[38],"to":[39,43,152,179],"this":[40,75,78,88,96],"[1]":[41],"is":[42,80],"learn":[44],"embedding":[47,154],"space":[48],"suitable":[51,182],"similarity":[52],"structure":[53],"for":[54,183],"image":[58],"snippets,":[59],"using":[60],"deep":[62],"neural":[63],"network,":[64],"identifying":[66],"candidate":[67],"pieces":[68],"by":[69,147],"near":[71],"neighbour":[72],"search":[73],"in":[74,164],"space.":[76],"However,":[77],"method":[79,174],"oblivious":[81],"temporal":[83],"aspects":[84],"music.":[86,165],"In":[87],"paper,":[89],"we":[90,99,138,168],"introduce":[91],"two":[92],"strategies":[93],"that":[94,103,140,157],"address":[95],"shortcoming.":[97],"First,":[98],"present":[100],"strategy":[102],"aligns":[104],"sequences":[105],"embeddings":[107],"learned":[108],"scans":[112],"snippets.":[115],"A":[116],"series":[117],"experiments":[119],"on":[120,126],"whole":[121],"fragment-level":[124],"retrieval":[125,142],"24":[127],"hours":[128],"worth":[129],"classical":[131],"piano":[132],"recordings":[133],"demonstrates":[134],"significant":[135],"improvement.":[136],"Second,":[137],"show":[139],"can":[143],"be":[144],"further":[145],"improved":[146],"introducing":[148],"an":[149],"attention":[150],"mechanism":[151],"learning":[155],"model":[156],"reduces":[158],"effects":[160],"tempo":[162],"variations":[163],"To":[166],"conclude,":[167],"assess":[169],"scalability":[171],"our":[173],"discuss":[176],"potential":[177],"measures":[178],"make":[180],"it":[181],"truly":[184],"large-scale":[185],"applications.":[186]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
