{"id":"https://openalex.org/W3138202130","doi":"https://doi.org/10.1109/lsp.2021.3067635","title":"Self-Supervised Learning of Audio Representations From Permutations With Differentiable Ranking","display_name":"Self-Supervised Learning of Audio Representations From Permutations With Differentiable Ranking","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3138202130","doi":"https://doi.org/10.1109/lsp.2021.3067635","mag":"3138202130"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2021.3067635","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2021.3067635","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2103.09879","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Andrew N. Carr","orcid":"https://orcid.org/0000-0001-8294-1522"},"institutions":[{"id":"https://openalex.org/I100005738","display_name":"Brigham Young University","ror":"https://ror.org/047rhhm47","country_code":"US","type":"education","lineage":["https://openalex.org/I100005738"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew N. Carr","raw_affiliation_strings":["Computer Science Department, Brigham Young University, Provo, USA"],"raw_orcid":"https://orcid.org/0000-0001-8294-1522","affiliations":[{"raw_affiliation_string":"Computer Science Department, Brigham Young University, Provo, USA","institution_ids":["https://openalex.org/I100005738"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Quentin Berthet","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Quentin Berthet","raw_affiliation_strings":["Google Brain Paris, Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Brain Paris, Paris, France","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mathieu Blondel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mathieu Blondel","raw_affiliation_strings":["Google Brain Paris, Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Brain Paris, Paris, France","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Olivier Teboul","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olivier Teboul","raw_affiliation_strings":["Google Brain Paris, Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Brain Paris, Paris, France","institution_ids":[]}]},{"author_position":"last","author":{"id":null,"display_name":"Neil Zeghidour","orcid":"https://orcid.org/0000-0001-6896-3987"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Neil Zeghidour","raw_affiliation_strings":["Google Brain Paris, Paris, France"],"raw_orcid":"https://orcid.org/0000-0001-6896-3987","affiliations":[{"raw_affiliation_string":"Google Brain Paris, Paris, France","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.1619,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.87848966,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"28","issue":null,"first_page":"708","last_page":"712"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.6301000118255615,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.6301000118255615,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.06840000301599503,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.06800000369548798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/permutation","display_name":"Permutation (music)","score":0.595300018787384},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.5803999900817871},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5329999923706055},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.519599974155426},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.47540000081062317},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.44209998846054077},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.3743000030517578},{"id":"https://openalex.org/keywords/random-permutation","display_name":"Random permutation","score":0.3682999908924103},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.36399999260902405},{"id":"https://openalex.org/keywords/musical-instrument","display_name":"Musical instrument","score":0.35510000586509705}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6294000148773193},{"id":"https://openalex.org/C21308566","wikidata":"https://www.wikidata.org/wiki/Q7169365","display_name":"Permutation (music)","level":2,"score":0.595300018787384},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.5803999900817871},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5329999923706055},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5295000076293945},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.519599974155426},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.47540000081062317},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.44209998846054077},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.3743000030517578},{"id":"https://openalex.org/C200985842","wikidata":"https://www.wikidata.org/wiki/Q3375503","display_name":"Random permutation","level":3,"score":0.3682999908924103},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.36399999260902405},{"id":"https://openalex.org/C2983311337","wikidata":"https://www.wikidata.org/wiki/Q34379","display_name":"Musical instrument","level":2,"score":0.35510000586509705},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34310001134872437},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.3418000042438507},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3305000066757202},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.32760000228881836},{"id":"https://openalex.org/C202615002","wikidata":"https://www.wikidata.org/wiki/Q783507","display_name":"Differentiable function","level":2,"score":0.3098999857902527},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3001999855041504},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2985999882221222},{"id":"https://openalex.org/C2780880553","wikidata":"https://www.wikidata.org/wiki/Q213250","display_name":"Absolute pitch","level":3,"score":0.29750001430511475},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.28380000591278076},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27959999442100525},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.27639999985694885},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.27619999647140503},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2736000120639801},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2721000015735626},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.27090001106262207},{"id":"https://openalex.org/C2779627259","wikidata":"https://www.wikidata.org/wiki/Q779763","display_name":"Pretext","level":3,"score":0.26669999957084656},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.26269999146461487},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.25940001010894775},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2583000063896179},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.25619998574256897},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.2531000077724457}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lsp.2021.3067635","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2021.3067635","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2103.09879","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2103.09879","pdf_url":"https://arxiv.org/pdf/2103.09879","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2103.09879","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2103.09879","pdf_url":"https://arxiv.org/pdf/2103.09879","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W343636949","https://openalex.org/W2321533354","https://openalex.org/W2599837529","https://openalex.org/W2606014079","https://openalex.org/W2618530766","https://openalex.org/W2883725317","https://openalex.org/W2963420272","https://openalex.org/W2964037671","https://openalex.org/W2982223350","https://openalex.org/W2992274074","https://openalex.org/W3011176162","https://openalex.org/W3015213852","https://openalex.org/W3016181583","https://openalex.org/W4302313152","https://openalex.org/W6631190155","https://openalex.org/W6713134421","https://openalex.org/W6736723571","https://openalex.org/W6746693533","https://openalex.org/W6747885307","https://openalex.org/W6747899497","https://openalex.org/W6755207826","https://openalex.org/W6757726483","https://openalex.org/W6757850993","https://openalex.org/W6770812723","https://openalex.org/W6771483944","https://openalex.org/W6774314701","https://openalex.org/W6774365965","https://openalex.org/W6774743317","https://openalex.org/W6779919476","https://openalex.org/W6780218876","https://openalex.org/W6791429434"],"related_works":[],"abstract_inverted_index":{"Self-supervised":[0],"pre-training":[1,27],"using":[2,66,110],"so-called":[3],"\u201cpretext\u201d":[4],"tasks":[5],"has":[6],"recently":[7],"shown":[8],"impressive":[9],"performance":[10],"across":[11],"a":[12,28,111,122],"wide":[13],"range":[14],"of":[15,34,37,57,87,105,143],"modalities.":[16],"In":[17,134],"this":[18],"work,":[19],"we":[20,52,116,136],"advance":[21],"self-supervised":[22],"learning":[23,97,127],"from":[24,98],"permutations,":[25],"by":[26,76,146],"model":[29],"to":[30,41],"reorder":[31],"shuffled":[32],"parts":[33],"the":[35,54,78,85,103,106,151],"spectrogram":[36,148],"an":[38,62,131],"audio":[39,128],"signal,":[40],"improve":[42,137],"downstream":[43],"classification":[44,139],"performance.":[45],"We":[46],"make":[47],"two":[48],"main":[49,55],"contributions.":[50],"First,":[51],"overcome":[53],"challenges":[56],"integrating":[58],"permutation":[59],"inversions":[60],"into":[61],"end-to-end":[63],"training":[64],"scheme,":[65],"recent":[67],"advances":[68],"in":[69,130,150],"differentiable":[70],"ranking.":[71],"This":[72],"was":[73],"heretofore":[74],"sidestepped":[75],"casting":[77],"reordering":[79,147],"task":[80,125],"as":[81],"classification,":[82],"fundamentally":[83],"reducing":[84],"space":[86],"permutations":[88,101,120],"that":[89,96,118],"can":[90],"be":[91],"exploited.":[92],"Our":[93],"experiments":[94],"validate":[95],"all":[99],"possible":[100],"improves":[102],"quality":[104],"pre-trained":[107],"representations":[108,129],"over":[109],"limited,":[112],"fixed":[113],"set.":[114],"Second,":[115],"show":[117],"inverting":[119],"is":[121],"meaningful":[123],"pretext":[124],"for":[126],"unsupervised":[132],"fashion.":[133],"particular,":[135],"instrument":[138],"and":[140],"pitch":[141],"estimation":[142],"musical":[144],"notes":[145],"patches":[149],"time-frequency":[152],"space.":[153]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2021-03-29T00:00:00"}
