{"id":"https://openalex.org/W4412119716","doi":"https://doi.org/10.1145/3719384.3719430","title":"Emotion-Guided Image to Music Generation","display_name":"Emotion-Guided Image to Music Generation","publication_year":2024,"publication_date":"2024-12-14","ids":{"openalex":"https://openalex.org/W4412119716","doi":"https://doi.org/10.1145/3719384.3719430"},"language":"en","primary_location":{"id":"doi:10.1145/3719384.3719430","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3719384.3719430","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3719384.3719430","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 7th Artificial Intelligence and Cloud Computing Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3719384.3719430","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103171388","display_name":"Souraja Kundu","orcid":"https://orcid.org/0009-0008-7115-0926"},"institutions":[{"id":"https://openalex.org/I1317621060","display_name":"Indian Institute of Technology Guwahati","ror":"https://ror.org/0022nd079","country_code":"IN","type":"education","lineage":["https://openalex.org/I1317621060"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Souraja Kundu","raw_affiliation_strings":["Department of Electronics and Electrical Engineering, Indian Institute of Technology Guwahati, Guwahati, Assam, India"],"affiliations":[{"raw_affiliation_string":"Department of Electronics and Electrical Engineering, Indian Institute of Technology Guwahati, Guwahati, Assam, India","institution_ids":["https://openalex.org/I1317621060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103329310","display_name":"Satbir Singh","orcid":null},"institutions":[{"id":"https://openalex.org/I4210139030","display_name":"Samsung (India)","ror":"https://ror.org/04cpx2569","country_code":"IN","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210139030"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Saket Singh","raw_affiliation_strings":["Visual Solutions Team, Samsung R&amp;D Institute Noida, Noida, Uttar Pradesh, India"],"affiliations":[{"raw_affiliation_string":"Visual Solutions Team, Samsung R&amp;D Institute Noida, Noida, Uttar Pradesh, India","institution_ids":["https://openalex.org/I4210139030"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103202402","display_name":"Yuji Iwahori","orcid":"https://orcid.org/0000-0003-1016-1636"},"institutions":[{"id":"https://openalex.org/I184937672","display_name":"Chubu University","ror":"https://ror.org/02sps0775","country_code":"JP","type":"education","lineage":["https://openalex.org/I184937672"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuji Iwahori","raw_affiliation_strings":["Department of Computer Science, Chubu University, Nagoya, Aichi, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Chubu University, Nagoya, Aichi, Japan","institution_ids":["https://openalex.org/I184937672"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5103171388"],"corresponding_institution_ids":["https://openalex.org/I1317621060"],"apc_list":null,"apc_paid":null,"fwci":0.2632,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.60073242,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"323","last_page":"330"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9842000007629395,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12650","display_name":"Aesthetic Perception and Analysis","score":0.9807000160217285,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6021651029586792},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4727257788181305},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3916531503200531},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32072633504867554}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6021651029586792},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4727257788181305},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3916531503200531},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32072633504867554}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3719384.3719430","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3719384.3719430","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3719384.3719430","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 7th Artificial Intelligence and Cloud Computing Conference","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3719384.3719430","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3719384.3719430","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3719384.3719430","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 7th Artificial Intelligence and Cloud Computing Conference","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321555","display_name":"Chubu University","ror":"https://ror.org/02sps0775"},{"id":"https://openalex.org/F4320332195","display_name":"Samsung","ror":"https://ror.org/04w3jy968"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412119716.pdf","grobid_xml":"https://content.openalex.org/works/W4412119716.grobid-xml"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W185767063","https://openalex.org/W1595049743","https://openalex.org/W2006556239","https://openalex.org/W2064675550","https://openalex.org/W2072007696","https://openalex.org/W2112796928","https://openalex.org/W2138621811","https://openalex.org/W2169896877","https://openalex.org/W2182791303","https://openalex.org/W2183341477","https://openalex.org/W2214864214","https://openalex.org/W2417429787","https://openalex.org/W2559655401","https://openalex.org/W2604555320","https://openalex.org/W2752796333","https://openalex.org/W2753738274","https://openalex.org/W2956015785","https://openalex.org/W2963978237","https://openalex.org/W3086550980","https://openalex.org/W3119862448","https://openalex.org/W3135367836","https://openalex.org/W3193482276","https://openalex.org/W3209712801","https://openalex.org/W4200531289","https://openalex.org/W4226453196","https://openalex.org/W4287802874","https://openalex.org/W4288391450","https://openalex.org/W4308613617","https://openalex.org/W4313495875","https://openalex.org/W4317791948","https://openalex.org/W4318002256","https://openalex.org/W4382053399","https://openalex.org/W4387698232","https://openalex.org/W4389371446","https://openalex.org/W4392224323","https://openalex.org/W6739349799","https://openalex.org/W6739901393","https://openalex.org/W6755257315","https://openalex.org/W6776218486","https://openalex.org/W6849105126"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Generating":[0],"music":[1,10,37],"from":[2,95],"images":[3],"can":[4],"enhance":[5],"various":[6],"applications,":[7],"including":[8],"background":[9],"for":[11,56],"photo":[12],"slideshows,":[13],"social":[14],"media":[15],"experiences,":[16],"and":[17,85,107,138],"video":[18],"creation.":[19],"This":[20],"paper":[21],"presents":[22],"an":[23],"emotion-guided":[24],"image-to-music":[25],"generation":[26],"framework":[27],"that":[28,38,51],"leverages":[29],"the":[30,41,59,123],"Valence-Arousal":[31],"(VA)":[32],"emotional":[33,42,57,71,93],"space":[34],"to":[35,68,89,104],"produce":[36],"aligns":[39],"with":[40],"tone":[43],"of":[44],"a":[45,64,76,115],"given":[46],"image.":[47],"Unlike":[48],"previous":[49],"models":[50],"rely":[52],"on":[53,114],"contrastive":[54],"learning":[55],"consistency,":[58],"proposed":[60,124],"approach":[61],"directly":[62],"integrates":[63],"VA":[65],"loss":[66,139],"function":[67],"enable":[69],"accurate":[70],"alignment.":[72],"The":[73],"model":[74],"employs":[75],"CNN-Transformer":[77],"architecture,":[78],"featuring":[79],"pre-trained":[80],"CNN":[81],"image":[82],"feature":[83],"extractors":[84],"three":[86],"Transformer":[87,99],"encoders":[88],"capture":[90],"complex,":[91],"high-level":[92],"features":[94,103],"MIDI":[96,110],"music.":[97],"Three":[98],"decoders":[100],"refine":[101],"these":[102],"generate":[105],"musically":[106],"emotionally":[108,118],"consistent":[109],"sequences.":[111],"Experimental":[112],"results":[113],"newly":[116],"curated":[117],"paired":[119],"image-MIDI":[120],"dataset":[121],"demonstrate":[122],"model's":[125],"superior":[126],"performance":[127],"across":[128],"metrics":[129],"such":[130],"as":[131],"Polyphony":[132],"Rate,":[133],"Pitch":[134],"Entropy,":[135],"Groove":[136],"Consistency,":[137],"convergence.":[140]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
