{"id":"https://openalex.org/W3159920639","doi":"https://doi.org/10.1109/ijcnn52387.2021.9533662","title":"Cross-Modal Music-Video Recommendation: A Study of Design Choices","display_name":"Cross-Modal Music-Video Recommendation: A Study of Design Choices","publication_year":2021,"publication_date":"2021-07-18","ids":{"openalex":"https://openalex.org/W3159920639","doi":"https://doi.org/10.1109/ijcnn52387.2021.9533662","mag":"3159920639"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn52387.2021.9533662","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9533662","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2104.14799","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059705726","display_name":"Laure Pr\u00e9tet","orcid":"https://orcid.org/0000-0001-5224-315X"},"institutions":[{"id":"https://openalex.org/I12356871","display_name":"T\u00e9l\u00e9com Paris","ror":"https://ror.org/01naq7912","country_code":"FR","type":"education","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102"]},{"id":"https://openalex.org/I4210165912","display_name":"Laboratoire Traitement et Communication de l\u2019Information","ror":"https://ror.org/057er4c39","country_code":"FR","type":"facility","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102","https://openalex.org/I4210165912"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Laure Pretet","raw_affiliation_strings":["LTCI, T\u00e9l\u00e9com Paris Bridge.audio, Paris, France","LTCI, T\u00e9l\u00e9com Paris Bridge.audio,Paris,France"],"affiliations":[{"raw_affiliation_string":"LTCI, T\u00e9l\u00e9com Paris Bridge.audio, Paris, France","institution_ids":["https://openalex.org/I4210165912","https://openalex.org/I12356871"]},{"raw_affiliation_string":"LTCI, T\u00e9l\u00e9com Paris Bridge.audio,Paris,France","institution_ids":["https://openalex.org/I12356871","https://openalex.org/I4210165912"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055423112","display_name":"Ga\u00ebl Richard","orcid":"https://orcid.org/0000-0002-4960-0010"},"institutions":[{"id":"https://openalex.org/I12356871","display_name":"T\u00e9l\u00e9com Paris","ror":"https://ror.org/01naq7912","country_code":"FR","type":"education","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102"]},{"id":"https://openalex.org/I4210165912","display_name":"Laboratoire Traitement et Communication de l\u2019Information","ror":"https://ror.org/057er4c39","country_code":"FR","type":"facility","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102","https://openalex.org/I4210165912"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Gael Richard","raw_affiliation_strings":["LTCI, T\u00e9l\u00e9com Paris Institut Polytechnique de Paris, France","LTCI, T\u00e9l\u00e9com Paris, Institut Polytechnique de Paris (France)"],"affiliations":[{"raw_affiliation_string":"LTCI, T\u00e9l\u00e9com Paris Institut Polytechnique de Paris, France","institution_ids":["https://openalex.org/I4210165912","https://openalex.org/I12356871"]},{"raw_affiliation_string":"LTCI, T\u00e9l\u00e9com Paris, Institut Polytechnique de Paris (France)","institution_ids":["https://openalex.org/I4210165912","https://openalex.org/I12356871"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063097936","display_name":"Geoffroy Peeters","orcid":"https://orcid.org/0000-0001-5255-3019"},"institutions":[{"id":"https://openalex.org/I12356871","display_name":"T\u00e9l\u00e9com Paris","ror":"https://ror.org/01naq7912","country_code":"FR","type":"education","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102"]},{"id":"https://openalex.org/I4210165912","display_name":"Laboratoire Traitement et Communication de l\u2019Information","ror":"https://ror.org/057er4c39","country_code":"FR","type":"facility","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102","https://openalex.org/I4210165912"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Geoffroy Peeters","raw_affiliation_strings":["LTCI, T\u00e9l\u00e9com Paris Institut Polytechnique de Paris, France","LTCI, T\u00e9l\u00e9com Paris, Institut Polytechnique de Paris (France)"],"affiliations":[{"raw_affiliation_string":"LTCI, T\u00e9l\u00e9com Paris Institut Polytechnique de Paris, France","institution_ids":["https://openalex.org/I4210165912","https://openalex.org/I12356871"]},{"raw_affiliation_string":"LTCI, T\u00e9l\u00e9com Paris, Institut Polytechnique de Paris (France)","institution_ids":["https://openalex.org/I4210165912","https://openalex.org/I12356871"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5059705726"],"corresponding_institution_ids":["https://openalex.org/I12356871","https://openalex.org/I4210165912"],"apc_list":null,"apc_paid":null,"fwci":0.1539,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.41440172,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8121200799942017},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6263373494148254},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5248432159423828},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.5238480567932129},{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.4622592329978943},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45303380489349365},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.43385544419288635},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4015069901943207},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39515334367752075}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8121200799942017},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6263373494148254},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5248432159423828},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.5238480567932129},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.4622592329978943},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45303380489349365},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.43385544419288635},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4015069901943207},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39515334367752075},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.1109/ijcnn52387.2021.9533662","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9533662","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2104.14799","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.14799","pdf_url":"https://arxiv.org/pdf/2104.14799","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3159920639","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2104.14799","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:HAL:hal-03208323v1","is_oa":false,"landing_page_url":"https://telecom-paris.hal.science/hal-03208323","pdf_url":null,"source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Special Session of the International Joint Conference on Neural Networks (IJCNN 2021), Jul 2021, Shenzhen, China","raw_type":"Conference papers"},{"id":"pmh:oai:oskar-bordeaux.fr:20.500.12278/40058","is_oa":false,"landing_page_url":"https://oskar-bordeaux.fr/handle/20.500.12278/40058","pdf_url":null,"source":{"id":"https://openalex.org/S4306402569","display_name":"Oskar-Bordeaux (Universite de Bordeaux)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Communication dans un congr\u00e8s avec actes"},{"id":"doi:10.48550/arxiv.2104.14799","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2104.14799","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2104.14799","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.14799","pdf_url":"https://arxiv.org/pdf/2104.14799","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3159920639.pdf","grobid_xml":"https://content.openalex.org/works/W3159920639.grobid-xml"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W1587431844","https://openalex.org/W1923404803","https://openalex.org/W1975517671","https://openalex.org/W2106053110","https://openalex.org/W2116777898","https://openalex.org/W2147776527","https://openalex.org/W2161381512","https://openalex.org/W2184188583","https://openalex.org/W2191779130","https://openalex.org/W2209982072","https://openalex.org/W2476034201","https://openalex.org/W2511428026","https://openalex.org/W2524365899","https://openalex.org/W2535046761","https://openalex.org/W2593116425","https://openalex.org/W2600087639","https://openalex.org/W2619697695","https://openalex.org/W2623327532","https://openalex.org/W2892865870","https://openalex.org/W2906259846","https://openalex.org/W2908965201","https://openalex.org/W2939574508","https://openalex.org/W2962756039","https://openalex.org/W2962960500","https://openalex.org/W2962970472","https://openalex.org/W2963115079","https://openalex.org/W2964109005","https://openalex.org/W2990387939","https://openalex.org/W3000400453","https://openalex.org/W3003425996","https://openalex.org/W3015470225","https://openalex.org/W6675751002","https://openalex.org/W6681741891","https://openalex.org/W6686207219","https://openalex.org/W6721087566","https://openalex.org/W6729831399","https://openalex.org/W6732370471","https://openalex.org/W6735252623","https://openalex.org/W6738806211","https://openalex.org/W6768465061","https://openalex.org/W6771024394","https://openalex.org/W6772232246"],"related_works":["https://openalex.org/W3198668032","https://openalex.org/W2968808972","https://openalex.org/W2908965201","https://openalex.org/W3179432212","https://openalex.org/W3155372489","https://openalex.org/W2783457476","https://openalex.org/W3197980617","https://openalex.org/W3207922251","https://openalex.org/W3047425522","https://openalex.org/W3202295776","https://openalex.org/W3048065599","https://openalex.org/W2948242301","https://openalex.org/W3034875620","https://openalex.org/W3045687178","https://openalex.org/W3034742263","https://openalex.org/W3044087851","https://openalex.org/W3143970329","https://openalex.org/W2982672255","https://openalex.org/W2776528780","https://openalex.org/W3203164940"],"abstract_inverted_index":{"In":[0,67],"this":[1,68],"work,":[2,69],"we":[3,53,70],"study":[4],"music/video":[5],"cross-modal":[6,130],"recommendation,":[7],"i.e.":[8],"recommending":[9],"a":[10,14,22,29,38,45,73,89],"music":[11],"track":[12],"for":[13],"video":[15,58],"or":[16,117],"vice":[17],"versa.":[18],"We":[19,35,97,123,149],"rely":[20,36],"on":[21,37,83],"self-supervised":[23,39,147],"learning":[24,40,104],"paradigm":[25,41],"to":[26,42,139],"learn":[27,43,55],"from":[28,44],"large":[30,46],"amount":[31,47],"of":[32,48,91,128],"unlabelled":[33,49],"data.":[34,50],"More":[51],"precisely,":[52],"jointly":[54],"audio":[56,85,102,108],"and":[57],"embeddings":[59,109],"by":[60,88,111,118],"using":[61,101,154],"their":[62],"co-occurrence":[63],"in":[64,135,146],"music-video":[65],"clips.":[66],"build":[71],"upon":[72],"recent":[74],"video-music":[75],"retrieval":[76],"system":[77],"(the":[78],"VM-NET),":[79],"which":[80],"originally":[81,133],"relies":[82],"an":[84],"representation":[86,103],"obtained":[87],"set":[90],"statistics":[92],"computed":[93],"over":[94],"handcrafted":[95],"features.":[96],"demonstrate":[98],"here":[99],"that":[100],"such":[105],"as":[106],"the":[107,112,126,129,136,140,155],"provided":[110],"pre-trained":[113],"MuSimNet,":[114],"OpenL3,":[115],"MusicCNN":[116],"AudioSet,":[119],"largely":[120],"improves":[121],"recommendations.":[122],"also":[124],"validate":[125],"use":[127],"triplet":[131],"loss":[132,143],"proposed":[134],"VM-NET":[137],"compared":[138],"binary":[141],"cross-entropy":[142],"commonly":[144],"used":[145],"learning.":[148],"perform":[150],"all":[151],"our":[152],"experiments":[153],"Music":[156],"Video":[157],"Dataset":[158],"(MVD).":[159]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
