{"id":"https://openalex.org/W3161635175","doi":"https://doi.org/10.1109/icpr48806.2021.9412698","title":"Let's Play Music: Audio-Driven Performance Video Generation","display_name":"Let's Play Music: Audio-Driven Performance Video Generation","publication_year":2021,"publication_date":"2021-01-10","ids":{"openalex":"https://openalex.org/W3161635175","doi":"https://doi.org/10.1109/icpr48806.2021.9412698","mag":"3161635175"},"language":"en","primary_location":{"id":"doi:10.1109/icpr48806.2021.9412698","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr48806.2021.9412698","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 25th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101589497","display_name":"Hao Zhu","orcid":"https://orcid.org/0000-0003-2155-1488"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hao Zhu","raw_affiliation_strings":["Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), CASIA, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), CASIA, Beijing, China","institution_ids":["https://openalex.org/I4210112150"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100421473","display_name":"Yi Li","orcid":"https://orcid.org/0000-0002-2856-7290"},"institutions":[{"id":"https://openalex.org/I4210097554","display_name":"Center for Excellence in Brain Science and Intelligence Technology","ror":"https://ror.org/00vpwhm04","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210097554"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Li","raw_affiliation_strings":["Center for Excellence in Brain Science and Intelligence Technology, CAS, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Center for Excellence in Brain Science and Intelligence Technology, CAS, Beijing, China","institution_ids":["https://openalex.org/I4210097554"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091454702","display_name":"Feixia Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I143868143","display_name":"Anhui University","ror":"https://ror.org/05th6yx34","country_code":"CN","type":"education","lineage":["https://openalex.org/I143868143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feixia Zhu","raw_affiliation_strings":["Anhui Provincial Key Laboratory of Multimodal Cognitive Computation, School of Computer Science and Technology, Anhui University"],"affiliations":[{"raw_affiliation_string":"Anhui Provincial Key Laboratory of Multimodal Cognitive Computation, School of Computer Science and Technology, Anhui University","institution_ids":["https://openalex.org/I143868143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017558628","display_name":"Aihua Zheng","orcid":"https://orcid.org/0000-0002-9820-4743"},"institutions":[{"id":"https://openalex.org/I143868143","display_name":"Anhui University","ror":"https://ror.org/05th6yx34","country_code":"CN","type":"education","lineage":["https://openalex.org/I143868143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Aihua Zheng","raw_affiliation_strings":["Anhui Provincial Key Laboratory of Multimodal Cognitive Computation, School of Computer Science and Technology, Anhui University"],"affiliations":[{"raw_affiliation_string":"Anhui Provincial Key Laboratory of Multimodal Cognitive Computation, School of Computer Science and Technology, Anhui University","institution_ids":["https://openalex.org/I143868143"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112749024","display_name":"Ran He","orcid":"https://orcid.org/0000-0002-3807-991X"},"institutions":[{"id":"https://openalex.org/I4210097554","display_name":"Center for Excellence in Brain Science and Intelligence Technology","ror":"https://ror.org/00vpwhm04","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210097554"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ran He","raw_affiliation_strings":["Center for Excellence in Brain Science and Intelligence Technology, CAS, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Center for Excellence in Brain Science and Intelligence Technology, CAS, Beijing, China","institution_ids":["https://openalex.org/I4210097554"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101589497"],"corresponding_institution_ids":["https://openalex.org/I4210112150"],"apc_list":null,"apc_paid":null,"fwci":0.9142,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.73373901,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"3574","last_page":"3581"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8455034494400024},{"id":"https://openalex.org/keywords/inter-frame","display_name":"Inter frame","score":0.6153199076652527},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4742892384529114},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4045850932598114},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40420976281166077},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.3731119632720947},{"id":"https://openalex.org/keywords/reference-frame","display_name":"Reference frame","score":0.23513653874397278}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8455034494400024},{"id":"https://openalex.org/C39394851","wikidata":"https://www.wikidata.org/wiki/Q921594","display_name":"Inter frame","level":4,"score":0.6153199076652527},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4742892384529114},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4045850932598114},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40420976281166077},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.3731119632720947},{"id":"https://openalex.org/C172849965","wikidata":"https://www.wikidata.org/wiki/Q3148875","display_name":"Reference frame","level":3,"score":0.23513653874397278},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icpr48806.2021.9412698","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr48806.2021.9412698","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 25th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3254430106","display_name":null,"funder_award_id":"61976002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3666211740","display_name":null,"funder_award_id":"JQ18017","funder_id":"https://openalex.org/F4320322919","funder_display_name":"Natural Science Foundation of Beijing Municipality"},{"id":"https://openalex.org/G5276586959","display_name":null,"funder_award_id":"KJ2019A0033","funder_id":"https://openalex.org/F4320334897","funder_display_name":"Natural Science Foundation of Anhui Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322919","display_name":"Natural Science Foundation of Beijing Municipality","ror":null},{"id":"https://openalex.org/F4320334897","display_name":"Natural Science Foundation of Anhui Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1677182931","https://openalex.org/W1686810756","https://openalex.org/W1901129140","https://openalex.org/W1924770834","https://openalex.org/W2080489767","https://openalex.org/W2099471712","https://openalex.org/W2133665775","https://openalex.org/W2157331557","https://openalex.org/W2331128040","https://openalex.org/W2519091744","https://openalex.org/W2556783285","https://openalex.org/W2738406145","https://openalex.org/W2780124704","https://openalex.org/W2782422271","https://openalex.org/W2790649793","https://openalex.org/W2798714868","https://openalex.org/W2891485065","https://openalex.org/W2899129842","https://openalex.org/W2904622387","https://openalex.org/W2944294033","https://openalex.org/W2949382160","https://openalex.org/W2952489094","https://openalex.org/W2962730651","https://openalex.org/W2962819541","https://openalex.org/W2962982136","https://openalex.org/W2963066677","https://openalex.org/W2963073614","https://openalex.org/W2963290645","https://openalex.org/W2963516695","https://openalex.org/W2963600167","https://openalex.org/W2963807156","https://openalex.org/W2963841322","https://openalex.org/W2964002510","https://openalex.org/W2964015378","https://openalex.org/W2981905048","https://openalex.org/W2984529706","https://openalex.org/W2989607414","https://openalex.org/W2999894541","https://openalex.org/W2999966482","https://openalex.org/W3005171070","https://openalex.org/W3099284785","https://openalex.org/W3102619627","https://openalex.org/W3154807520","https://openalex.org/W4288627824","https://openalex.org/W4320013936","https://openalex.org/W6639824700","https://openalex.org/W6670554727","https://openalex.org/W6702130928","https://openalex.org/W6726873649","https://openalex.org/W6736965957","https://openalex.org/W6738824914","https://openalex.org/W6744109851","https://openalex.org/W6753084572","https://openalex.org/W6753914649","https://openalex.org/W6770208262","https://openalex.org/W6772994595"],"related_works":["https://openalex.org/W1971496130","https://openalex.org/W1583975326","https://openalex.org/W1523192573","https://openalex.org/W2160100014","https://openalex.org/W2067265395","https://openalex.org/W2049101682","https://openalex.org/W2386802389","https://openalex.org/W2291726614","https://openalex.org/W4247021036","https://openalex.org/W2144951891"],"abstract_inverted_index":{"We":[0],"propose":[1,51,93,123],"a":[2,18,21,26,33,52,87,102,124],"new":[3],"task":[4,35],"named":[5],"Audio-driven":[6],"Performance":[7],"Video":[8],"Generation":[9],"(APVG),":[10],"which":[11],"aims":[12],"to":[13,36,55,94,99,116,129],"synthesize":[14],"the":[15,38,77,96,107,160],"video":[16,61,155],"of":[17,82,162],"person":[19],"playing":[20],"certain":[22],"instrument":[23],"guided":[24],"by":[25,75],"given":[27,63,88],"music":[28,89],"audio":[29,45],"clip.":[30],"It":[31],"is":[32,114],"challenging":[34],"generate":[37,56,117],"high-dimensional":[39],"temporal":[40,137,150],"consistent":[41],"videos":[42,79],"from":[43,62,86,119],"low-dimensional":[44],"modality.":[46],"In":[47],"this":[48],"paper,":[49],"we":[50,66,92,122],"multi-staged":[53],"framework":[54],"realistic":[57],"and":[58,71,80,84,135,146],"synchronized":[59],"performance":[60],"music.":[64],"Firstly,":[65],"provide":[67],"both":[68,131],"global":[69],"appearance":[70],"local":[72],"spatial":[73,111],"information":[74,112,134],"generating":[76],"coarse":[78],"keypoints":[81,98],"body":[83],"hands":[85],"respectively.":[90],"Then,":[91],"transform":[95],"generated":[97],"heatmap":[100,108],"via":[101,142],"differentiable":[103],"space":[104],"transformer,":[105],"since":[106],"provides":[109],"more":[110],"but":[113],"harder":[115],"directly":[118],"audio.":[120],"Finally,":[121],"Structured":[125],"Temporal":[126],"UNet":[127],"(STU)":[128],"extract":[130],"intra-frame":[132],"structured":[133],"interframe":[136],"consistency.":[138],"They":[139],"are":[140],"obtained":[141],"graph-based":[143],"structure":[144],"module,":[145],"CNN-GRU":[147],"based":[148],"high-level":[149],"module":[151],"respectively":[152],"for":[153],"final":[154],"generation.":[156],"Comprehensive":[157],"experiments":[158],"validate":[159],"effectiveness":[161],"our":[163],"proposed":[164],"framework.":[165]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
