{"id":"https://openalex.org/W3034680922","doi":"https://doi.org/10.1109/icme46284.2020.9102975","title":"Learn A Robust Representation For Cover Song Identification Via Aggregating Local And Global Music Temporal Context","display_name":"Learn A Robust Representation For Cover Song Identification Via Aggregating Local And Global Music Temporal Context","publication_year":2020,"publication_date":"2020-06-09","ids":{"openalex":"https://openalex.org/W3034680922","doi":"https://doi.org/10.1109/icme46284.2020.9102975","mag":"3034680922"},"language":"en","primary_location":{"id":"doi:10.1109/icme46284.2020.9102975","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme46284.2020.9102975","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062777592","display_name":"Chaoya Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Chaoya Jiang","raw_affiliation_strings":["Wangxuan Institute of Computer TechnologyPeking University"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer TechnologyPeking University","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100885747","display_name":"Deshun Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Deshun Yang","raw_affiliation_strings":["Wangxuan Institute of Computer TechnologyPeking University"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer TechnologyPeking University","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017207620","display_name":"Xiaoou Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Xiaoou Chen","raw_affiliation_strings":["Wangxuan Institute of Computer TechnologyPeking University"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer TechnologyPeking University","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5062777592"],"corresponding_institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.3042,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.53085696,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"7","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7634234428405762},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7492209076881409},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6319484114646912},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5950279831886292},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.572905957698822},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5333274602890015},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.5108606219291687},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5054512023925781},{"id":"https://openalex.org/keywords/pyramid","display_name":"Pyramid (geometry)","score":0.5009989738464355},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.48473426699638367},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.483779639005661},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4640817642211914},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.44745323061943054},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.43985098600387573},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.43786364793777466},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1065957248210907},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.08922114968299866},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.0830184817314148}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7634234428405762},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7492209076881409},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6319484114646912},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5950279831886292},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.572905957698822},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5333274602890015},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.5108606219291687},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5054512023925781},{"id":"https://openalex.org/C142575187","wikidata":"https://www.wikidata.org/wiki/Q3358290","display_name":"Pyramid (geometry)","level":2,"score":0.5009989738464355},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.48473426699638367},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.483779639005661},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4640817642211914},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.44745323061943054},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.43985098600387573},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.43786364793777466},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1065957248210907},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.08922114968299866},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0830184817314148},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icme46284.2020.9102975","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme46284.2020.9102975","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},{"id":"mag:3191209702","is_oa":false,"landing_page_url":"https://jglobal.jst.go.jp/en/detail?JGLOBAL_ID=202002285099917040","pdf_url":null,"source":{"id":"https://openalex.org/S4306512817","display_name":"IEEE Conference Proceedings","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"IEEE Conference Proceedings","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7400000095367432,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W166844666","https://openalex.org/W216397362","https://openalex.org/W1486009449","https://openalex.org/W1493535305","https://openalex.org/W1505667669","https://openalex.org/W2076608692","https://openalex.org/W2101642281","https://openalex.org/W2111007352","https://openalex.org/W2131739315","https://openalex.org/W2137319814","https://openalex.org/W2154473523","https://openalex.org/W2191779130","https://openalex.org/W2265256355","https://openalex.org/W2295756080","https://openalex.org/W2408482937","https://openalex.org/W2538860156","https://openalex.org/W2587288237","https://openalex.org/W2605092535","https://openalex.org/W2611198916","https://openalex.org/W2727839354","https://openalex.org/W2740932163","https://openalex.org/W2772072643","https://openalex.org/W2792764867","https://openalex.org/W2896276533","https://openalex.org/W2903337088","https://openalex.org/W2963840672","https://openalex.org/W2965178495","https://openalex.org/W2986451890","https://openalex.org/W4206856897","https://openalex.org/W6608682707","https://openalex.org/W6628883615","https://openalex.org/W6630030631","https://openalex.org/W6676368125","https://openalex.org/W6696085341","https://openalex.org/W6714078465","https://openalex.org/W6729231832","https://openalex.org/W6736035150","https://openalex.org/W6746244618","https://openalex.org/W6749825310","https://openalex.org/W6756466442","https://openalex.org/W6807223892","https://openalex.org/W6893151505"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W2797752778","https://openalex.org/W3119773509","https://openalex.org/W3208297503","https://openalex.org/W2889153461","https://openalex.org/W2964117661","https://openalex.org/W4388405611","https://openalex.org/W2619127353","https://openalex.org/W2242773987"],"abstract_inverted_index":{"Recently,":[0],"deep":[1,59,152],"learning":[2],"models":[3],"have":[4],"been":[5],"proposed":[6],"for":[7,18,33,61],"cover":[8,62],"song":[9,63],"identification":[10,64],"and":[11,78,123,133,150],"designed":[12],"to":[13,57,72,81,92,110,114,146],"learn":[14,58],"fixed-length":[15],"feature":[16],"vectors":[17],"music":[19,88],"recordings.":[20,89],"However,":[21],"the":[22,25,35,74,83,93,98,112,120,124,162],"aspect":[23],"of":[24,28,103],"temporal":[26,76,85],"progression":[27],"music,":[29],"which":[30,96],"is":[31,41,70],"important":[32],"measuring":[34],"melody":[36],"similarity":[37,99],"between":[38,100],"two":[39],"recordings,":[40,104],"not":[42],"well":[43],"exploited":[44],"in":[45,87],"those":[46],"models.":[47],"In":[48,90],"this":[49],"paper,":[50],"we":[51,105],"propose":[52],"a":[53,101,107],"new":[54],"Siamese":[55],"architecture":[56],"representations":[60,138],"where":[65],"Dilated":[66],"Temporal":[67,79],"Pyramid":[68],"Convolution":[69],"used":[71],"exploit":[73,82],"local":[75],"context":[77,86],"Self-Attention":[80],"global":[84],"addition":[91],"traditional":[94],"block":[95,109],"calculates":[97],"pair":[102],"add":[106],"classification":[108,125],"classify":[111],"recordings":[113],"their":[115],"respective":[116],"cliques.":[117],"By":[118],"combining":[119],"regression":[121],"loss":[122],"loss,":[126],"our":[127,141,158],"model":[128,142],"can":[129],"leam":[130],"more":[131],"robust":[132],"discriminative":[134],"latent":[135],"representations.":[136],"The":[137],"extracted":[139],"by":[140],"show":[143,156],"substantial":[144],"superiority":[145],"existing":[147],"hand-crafted":[148],"features":[149],"learned":[151],"features.":[153],"Experimental":[154],"results":[155],"that":[157],"approach":[159],"far":[160],"outperforms":[161],"state-of":[163],"the-art":[164],"methods":[165],"on":[166],"several":[167],"public":[168],"datasets.":[169]},"counts_by_year":[{"year":2024,"cited_by_count":20},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
