{"id":"https://openalex.org/W4408311533","doi":"https://doi.org/10.1142/s0218001425580017","title":"SUMMR: A Unified Multimodal Representation Framework for Songs","display_name":"SUMMR: A Unified Multimodal Representation Framework for Songs","publication_year":2025,"publication_date":"2025-03-11","ids":{"openalex":"https://openalex.org/W4408311533","doi":"https://doi.org/10.1142/s0218001425580017"},"language":"en","primary_location":{"id":"doi:10.1142/s0218001425580017","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218001425580017","pdf_url":null,"source":{"id":"https://openalex.org/S41486457","display_name":"International Journal of Pattern Recognition and Artificial Intelligence","issn_l":"0218-0014","issn":["0218-0014","1793-6381"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Pattern Recognition and Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119268752","display_name":"Lei Ye","orcid":"https://orcid.org/0009-0003-1093-6363"},"institutions":[{"id":"https://openalex.org/I174385955","display_name":"Hefei Normal University","ror":"https://ror.org/01b64k086","country_code":"CN","type":"education","lineage":["https://openalex.org/I174385955"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lei Ye","raw_affiliation_strings":["School of Music, Hefei Normal University, Hefei, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"School of Music, Hefei Normal University, Hefei, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I174385955"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015651736","display_name":"Bing Shen","orcid":"https://orcid.org/0000-0001-7263-4748"},"institutions":[{"id":"https://openalex.org/I25254941","display_name":"Beijing Normal University","ror":"https://ror.org/022k4wk35","country_code":"CN","type":"education","lineage":["https://openalex.org/I25254941"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bing Shen","raw_affiliation_strings":["School of Arts & Communication, Beijing Normal University Beijing, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"School of Arts & Communication, Beijing Normal University Beijing, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I25254941"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076040901","display_name":"Yu Su","orcid":"https://orcid.org/0000-0002-7950-4919"},"institutions":[{"id":"https://openalex.org/I174385955","display_name":"Hefei Normal University","ror":"https://ror.org/01b64k086","country_code":"CN","type":"education","lineage":["https://openalex.org/I174385955"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Su","raw_affiliation_strings":["School of Computer and Artificial Intelligence, Hefei Normal University Hefei, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Hefei Normal University Hefei, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I174385955"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100373698","display_name":"Xiao Dong Chen","orcid":"https://orcid.org/0000-0002-0150-0491"},"institutions":[{"id":"https://openalex.org/I174385955","display_name":"Hefei Normal University","ror":"https://ror.org/01b64k086","country_code":"CN","type":"education","lineage":["https://openalex.org/I174385955"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao Chen","raw_affiliation_strings":["School of Music, Hefei Normal University, Hefei, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"School of Music, Hefei Normal University, Hefei, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I174385955"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113900216","display_name":"Yi Gong","orcid":null},"institutions":[{"id":"https://openalex.org/I4210137491","display_name":"National Science Center","ror":"https://ror.org/03ha2q922","country_code":"PL","type":"funder","lineage":["https://openalex.org/I4210137491"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Yi Gong","raw_affiliation_strings":["Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Hefei, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Hefei, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I4210137491"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101450478","display_name":"Yifei Zhou","orcid":"https://orcid.org/0000-0003-1207-1810"},"institutions":[{"id":"https://openalex.org/I174385955","display_name":"Hefei Normal University","ror":"https://ror.org/01b64k086","country_code":"CN","type":"education","lineage":["https://openalex.org/I174385955"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifei Zhou","raw_affiliation_strings":["School of Music, Hefei Normal University, Hefei, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"School of Music, Hefei Normal University, Hefei, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I174385955"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100734933","display_name":"Jun L\u00fc","orcid":"https://orcid.org/0000-0003-2221-1872"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"JunYu Lu","raw_affiliation_strings":["School of Big Data, University of Science and Technology of China, Hefei, P.\u00a0R.\u00a0China"],"affiliations":[{"raw_affiliation_string":"School of Big Data, University of Science and Technology of China, Hefei, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I126520041","https://openalex.org/I16365422"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5119268752"],"corresponding_institution_ids":["https://openalex.org/I174385955"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04720134,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"39","issue":"12","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9708999991416931,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6450738310813904},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6214895248413086},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.592792272567749},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3946779668331146}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6450738310813904},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6214895248413086},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.592792272567749},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3946779668331146},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s0218001425580017","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218001425580017","pdf_url":null,"source":{"id":"https://openalex.org/S41486457","display_name":"International Journal of Pattern Recognition and Artificial Intelligence","issn_l":"0218-0014","issn":["0218-0014","1793-6381"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Pattern Recognition and Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6596708513","display_name":null,"funder_award_id":"sU20A20229","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W78455842","https://openalex.org/W180805716","https://openalex.org/W1926522780","https://openalex.org/W1965419851","https://openalex.org/W1980867644","https://openalex.org/W2031767620","https://openalex.org/W2070269130","https://openalex.org/W2107430826","https://openalex.org/W2133824856","https://openalex.org/W2328176404","https://openalex.org/W2405656250","https://openalex.org/W2478051194","https://openalex.org/W2808787330","https://openalex.org/W2902758295","https://openalex.org/W2914327968","https://openalex.org/W2957007523","https://openalex.org/W2963062788","https://openalex.org/W2964770898","https://openalex.org/W2974699933","https://openalex.org/W2978329087","https://openalex.org/W3011727199","https://openalex.org/W3195659928","https://openalex.org/W3201298926","https://openalex.org/W4232430330","https://openalex.org/W4238344759","https://openalex.org/W4313334373","https://openalex.org/W4368227694","https://openalex.org/W4372266552"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"The":[0],"understanding":[1,57,180],"and":[2,38,49,58,130,181,185],"representation":[3,59,82,138,182],"of":[4,44,60,103,139,168,177,183,193],"songs":[5,45],"is":[6,88,95,121,147],"a":[7,25,79,92,110,116,136,142],"crucial":[8],"issue":[9],"in":[10,19,76,196],"music":[11,21,32,132,163],"platforms,":[12],"as":[13],"it":[14],"can":[15,150],"facilitate":[16],"numerous":[17],"applications":[18],"the":[20,31,42,56,63,101,126,153,171,175],"field.":[22],"Songs":[23],"are":[24,107,158],"common":[26],"multimodal":[27,48,81],"art":[28],"form":[29],"within":[30],"domain,":[33],"achieving":[34],"rich":[35],"musical":[36],"connotations":[37],"strong":[39],"expressiveness.":[40],"However,":[41],"data":[43,106],"exhibit":[46],"obvious":[47],"heterogeneous":[50],"characteristics,":[51],"presenting":[52],"significant":[53],"challenges":[54,71],"to":[55,69],"songs.":[61,140],"Regrettably,":[62],"current":[64],"methods":[65],"do":[66],"not":[67],"respond":[68],"these":[70],"effectively.":[72],"To":[73],"this":[74,77],"end,":[75],"study,":[78],"unified":[80,111,137],"framework":[83,94],"for":[84,179],"songs,":[85,169,184],"namely":[86],"SUMMR,":[87],"proposed.":[89],"Specifically,":[90],"first,":[91],"two-layer":[93],"put":[96],"forward.":[97],"In":[98,113],"embedding":[99],"layer,":[100,115],"features":[102],"different":[104],"modalities":[105],"embedded":[108],"into":[109],"space.":[112],"content":[114],"novel":[117],"cross-modal":[118,127],"attention":[119],"mechanism":[120],"designed,":[122],"which":[123,149],"effectively":[124,151],"capture":[125],"semantic":[128],"associations":[129],"deep":[131],"features,":[133],"thereby":[134],"obtaining":[135],"Then,":[141],"two-level":[143],"hierarchical":[144],"pre-training":[145],"algorithm":[146],"proposed,":[148],"lower":[152],"training":[154],"cost.":[155],"Finally,":[156],"experiments":[157],"conducted":[159],"on":[160],"two":[161],"typical":[162],"tasks":[164],"with":[165],"public":[166],"datasets":[167],"where":[170],"experimental":[172],"results":[173],"demonstrate":[174],"effectiveness":[176],"SUMMR":[178,189],"also":[186],"show":[187],"that":[188],"has":[190],"good":[191],"capability":[192],"being":[194],"fine-tuned":[195],"many":[197],"song-based":[198],"tasks.":[199]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
