{"id":"https://openalex.org/W7108658341","doi":"https://doi.org/10.5281/zenodo.17811429","title":"PianoVAM: A Multimodal Piano Performance Dataset","display_name":"PianoVAM: A Multimodal Piano Performance Dataset","publication_year":2025,"publication_date":"2025-09-21","ids":{"openalex":"https://openalex.org/W7108658341","doi":"https://doi.org/10.5281/zenodo.17811429"},"language":null,"primary_location":{"id":"doi:10.5281/zenodo.17811429","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811429","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.17811429","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yonghyun Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yonghyun Kim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Junhyung Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junhyung Park","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Joonhyung Bae","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joonhyung Bae","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Kirak Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kirak Kim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Taegyun Kwon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Taegyun Kwon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Alexander Lerch","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alexander Lerch","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Juhan Nam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Juhan Nam","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.62573239,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.41819998621940613,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.41819998621940613,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.20170000195503235,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14034","display_name":"Musicians\u2019 Health and Performance","score":0.09939999878406525,"subfield":{"id":"https://openalex.org/subfields/2742","display_name":"Rehabilitation"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/piano","display_name":"Piano","score":0.8312000036239624},{"id":"https://openalex.org/keywords/midi","display_name":"MIDI","score":0.7752000093460083},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.59170001745224},{"id":"https://openalex.org/keywords/amateur","display_name":"Amateur","score":0.5544000267982483},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.5519000291824341},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.3246999979019165}],"concepts":[{"id":"https://openalex.org/C124086623","wikidata":"https://www.wikidata.org/wiki/Q5994","display_name":"Piano","level":2,"score":0.8312000036239624},{"id":"https://openalex.org/C8112396","wikidata":"https://www.wikidata.org/wiki/Q80535","display_name":"MIDI","level":2,"score":0.7752000093460083},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7462999820709229},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.59170001745224},{"id":"https://openalex.org/C2778044066","wikidata":"https://www.wikidata.org/wiki/Q455595","display_name":"Amateur","level":2,"score":0.5544000267982483},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.5519000291824341},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5145000219345093},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4530999958515167},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.3246999979019165},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31360000371932983},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2989000082015991},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.28360000252723694},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.25780001282691956},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2565999925136566}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.17811429","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811429","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.17811429","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811429","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,44],"multimodal":[1],"nature":[2],"of":[3],"music":[4,18],"performance":[5,30,72],"has":[6],"driven":[7],"increasing":[8],"interest":[9],"in":[10,68],"data":[11,100],"beyond":[12],"the":[13,17,96,103,136],"audio":[14,53],"domain":[15],"within":[16],"information":[19],"retrieval":[20],"(MIR)":[21],"community.":[22],"This":[23],"paper":[24],"introduces":[25],"PianoVAM,":[26],"a":[27,49,82,89],"comprehensive":[28],"piano":[29,133],"dataset":[31,45,138],"that":[32],"includes":[33],"videos,":[34],"audio,":[35],"MIDI,":[36],"hand":[37,84,118],"landmarks,":[38],"fingering":[39,77,91,113],"labels,":[40],"and":[41,54,70,76,88,102,131,142],"rich":[42],"metadata.":[43],"was":[46],"recorded":[47],"using":[48,81,135],"Disklavier":[50],"piano,":[51],"capturing":[52],"MIDI":[55],"from":[56,121],"amateur":[57],"pianists":[58],"during":[59,99],"their":[60],"daily":[61],"practice":[62],"sessions,":[63],"alongside":[64],"synchronized":[65],"top-view":[66],"videos":[67],"realistic":[69],"varied":[71],"conditions.":[73],"Hand":[74],"landmarks":[75,119],"labels":[78],"were":[79],"extracted":[80,120],"pretrained":[83],"pose":[85],"estimation":[86],"model":[87],"semi-automated":[90],"detection":[92,114],"algorithm.":[93],"We":[94],"discuss":[95,143],"challenges":[97],"encountered":[98],"collection":[101],"alignment":[104],"process":[105],"across":[106],"different":[107],"modalities.":[108],"Additionally,":[109],"we":[110,124],"describe":[111],"our":[112],"method":[115],"based":[116],"on":[117,128],"videos.":[122],"Finally,":[123],"present":[125],"experimental":[126],"results":[127],"both":[129],"audio-only":[130],"audio-visual":[132],"transcription":[134],"PianoVAM":[137],"for":[139],"benchmarking":[140],"purposes":[141],"other":[144],"potential":[145],"applications.":[146]},"counts_by_year":[],"updated_date":"2025-12-05T23:25:22.460635","created_date":"2025-12-05T00:00:00"}
