{"id":"https://openalex.org/W7108704846","doi":"https://doi.org/10.5281/zenodo.17811555","title":"PianoBind: A Multimodal Joint Embedding Model for Pop-piano Music","display_name":"PianoBind: A Multimodal Joint Embedding Model for Pop-piano Music","publication_year":2025,"publication_date":"2025-09-21","ids":{"openalex":"https://openalex.org/W7108704846","doi":"https://doi.org/10.5281/zenodo.17811555"},"language":null,"primary_location":{"id":"doi:10.5281/zenodo.17811555","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811555","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.17811555","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Hayeon Bang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hayeon Bang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Eunjin Choi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Eunjin Choi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Seungheon Doh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Seungheon Doh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Juhan Nam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Juhan Nam","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.62728686,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9775000214576721,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9775000214576721,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.00930000003427267,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10788","display_name":"Neuroscience and Music Perception","score":0.0012000000569969416,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/piano","display_name":"Piano","score":0.890999972820282},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.8062000274658203},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6721000075340271},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.6323999762535095},{"id":"https://openalex.org/keywords/homogeneous","display_name":"Homogeneous","score":0.5781999826431274},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.5097000002861023},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4952999949455261}],"concepts":[{"id":"https://openalex.org/C124086623","wikidata":"https://www.wikidata.org/wiki/Q5994","display_name":"Piano","level":2,"score":0.890999972820282},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.8062000274658203},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6721000075340271},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6574000120162964},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.6323999762535095},{"id":"https://openalex.org/C66882249","wikidata":"https://www.wikidata.org/wiki/Q169336","display_name":"Homogeneous","level":2,"score":0.5781999826431274},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.5097000002861023},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4952999949455261},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4909000098705292},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46160000562667847},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.40720000863075256},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39730000495910645},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.3824999928474426},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2930999994277954},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2802000045776367},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C173853756","wikidata":"https://www.wikidata.org/wiki/Q86915","display_name":"Dialog box","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.17811555","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811555","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.17811555","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811555","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7127620577812195,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Solo":[0],"piano":[1,42,60,109,126,137,161],"music,":[2,61,127],"despite":[3],"being":[4],"a":[5,76,93],"single-instrument":[6],"medium,":[7],"possesses":[8],"significant":[9],"expressive":[10],"capabilities,":[11],"conveying":[12],"rich":[13],"semantic":[14,37,101],"information":[15],"across":[16],"genres,":[17],"moods,":[18],"and":[19,66,89,106,135],"styles.":[20],"However,":[21],"current":[22],"general-purpose":[23,141],"music":[24,142],"representation":[25,47,155],"models,":[26],"predominantly":[27],"trained":[28],"on":[29,133],"large-scale":[30],"datasets,":[31],"often":[32],"struggle":[33],"to":[34,53,140],"captures":[35],"subtle":[36,123],"distinctions":[38,102],"within":[39,92],"homogeneous":[40,108,158],"solo":[41],"music.":[43,162],"Furthermore,":[44],"existing":[45],"piano-specific":[46,77],"models":[48],"are":[49],"typically":[50],"unimodal,":[51],"failing":[52],"capture":[54,122],"the":[55],"inherently":[56],"multimodal":[57,78,118,154],"nature":[58],"of":[59,125],"expressed":[62],"through":[63],"audio,":[64],"symbolic,":[65],"textual":[67],"modalities.":[68],"To":[69],"address":[70],"these":[71],"limitations,":[72],"we":[73],"propose":[74],"PianoBind,":[75],"joint":[79,94,143],"embedding":[80,95,144],"model.":[81],"We":[82],"systematically":[83],"investigate":[84],"strategies":[85],"for":[86,98,153],"multi-source":[87],"training":[88],"modality":[90],"utilization":[91],"framework":[96],"optimized":[97],"capturing":[99],"fine-grained":[100],"in":[103],"(1)":[104],"small-scale":[105],"(2)":[107],"datasets.":[110],"Our":[111],"experimental":[112],"results":[113],"demonstrate":[114],"that":[115,120],"PianoBind":[116],"learns":[117],"representations":[119],"effectively":[121],"nuances":[124],"achieving":[128],"superior":[129],"text-to-music":[130],"retrieval":[131],"performance":[132],"in-domain":[134],"out-of-domain":[136],"datasets":[138,159],"compared":[139],"models.":[145],"Moreover,":[146],"our":[147],"design":[148],"choices":[149],"offer":[150],"reusable":[151],"insights":[152],"learning":[156],"with":[157],"beyond":[160]},"counts_by_year":[],"updated_date":"2025-12-05T23:25:22.460635","created_date":"2025-12-05T00:00:00"}
