{"id":"https://openalex.org/W4415708127","doi":"https://doi.org/10.1109/icme59968.2025.11210229","title":"Dialogue Director: Bridging the Gap in Dialogue Visualization for Multimodal Storytelling","display_name":"Dialogue Director: Bridging the Gap in Dialogue Visualization for Multimodal Storytelling","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415708127","doi":"https://doi.org/10.1109/icme59968.2025.11210229"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11210229","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11210229","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100402911","display_name":"Min Zhang","orcid":"https://orcid.org/0000-0002-3895-5510"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Min Zhang","raw_affiliation_strings":["Xiamen University,School of Film,Xiamen,China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Film,Xiamen,China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101479023","display_name":"Zilin Wang","orcid":"https://orcid.org/0000-0003-0210-5124"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zilin Wang","raw_affiliation_strings":["Xiamen University,School of Film,Xiamen,China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Film,Xiamen,China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100772862","display_name":"Liyan Chen","orcid":"https://orcid.org/0000-0001-5713-3814"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liyan Chen","raw_affiliation_strings":["Xiamen University,School of Film,Xiamen,China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Film,Xiamen,China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066879706","display_name":"Kunhong Liu","orcid":"https://orcid.org/0000-0002-1222-8876"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kunhong Liu","raw_affiliation_strings":["Xiamen University,School of Film,Xiamen,China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Film,Xiamen,China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056910392","display_name":"Juncong Lin","orcid":"https://orcid.org/0000-0001-6500-6655"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Juncong Lin","raw_affiliation_strings":["Xiamen University,School of Informatics,Xiamen,China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Informatics,Xiamen,China","institution_ids":["https://openalex.org/I191208505"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100402911"],"corresponding_institution_ids":["https://openalex.org/I191208505"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.33745278,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8676999807357788,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8676999807357788,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.016200000420212746,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.014499999582767487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/storyboard","display_name":"Storyboard","score":0.861299991607666},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.8371000289916992},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.7487999796867371},{"id":"https://openalex.org/keywords/storytelling","display_name":"Storytelling","score":0.6141999959945679},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5540000200271606},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.5238999724388123},{"id":"https://openalex.org/keywords/multimodal-interaction","display_name":"Multimodal interaction","score":0.414900004863739}],"concepts":[{"id":"https://openalex.org/C2777080924","wikidata":"https://www.wikidata.org/wiki/Q334667","display_name":"Storyboard","level":2,"score":0.861299991607666},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.8371000289916992},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.7487999796867371},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7483000159263611},{"id":"https://openalex.org/C2776538412","wikidata":"https://www.wikidata.org/wiki/Q989963","display_name":"Storytelling","level":3,"score":0.6141999959945679},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5697000026702881},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5540000200271606},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.5238999724388123},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.5159000158309937},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.414900004863739},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4097999930381775},{"id":"https://openalex.org/C2778583943","wikidata":"https://www.wikidata.org/wiki/Q846516","display_name":"Digital storytelling","level":2,"score":0.3693000078201294},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.3382999897003174},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.3336000144481659},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32519999146461487},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C2779754051","wikidata":"https://www.wikidata.org/wiki/Q2903135","display_name":"Interactive storytelling","level":4,"score":0.28119999170303345},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.28060001134872437},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.26159998774528503}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11210229","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11210229","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2102166818","https://openalex.org/W2963047368","https://openalex.org/W3104780697","https://openalex.org/W3205612999","https://openalex.org/W4312933868","https://openalex.org/W4385574082","https://openalex.org/W4389574988","https://openalex.org/W4394593186","https://openalex.org/W4401991031","https://openalex.org/W4402979707","https://openalex.org/W4402980725","https://openalex.org/W4402982682","https://openalex.org/W4404965628","https://openalex.org/W4409366412","https://openalex.org/W4409367143","https://openalex.org/W4409368677","https://openalex.org/W4412915583","https://openalex.org/W4413158163"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,114],"AI-driven":[3],"storytelling":[4],"have":[5],"enhanced":[6],"video":[7],"generation":[8],"and":[9,32,71,80,91,101,120,128],"story":[10,132],"visualization.":[11,133],"However,":[12],"translating":[13],"dialogue-centric":[14],"scripts":[15,53],"into":[16,54],"coherent":[17],"storyboards":[18],"remains":[19],"a":[20,47,62],"significant":[21],"challenge":[22],"due":[23],"to":[24,94],"limited":[25],"script":[26,96,115],"detail,":[27],"inadequate":[28],"physical":[29,98,117],"context":[30,99],"understanding,":[31,97,119],"the":[33,126],"complexity":[34],"of":[35,130],"integrating":[36],"cinematic":[37,102,121],"principles.":[38],"To":[39],"address":[40],"these":[41],"challenges,":[42],"we":[43],"propose":[44],"Dialogue":[45,60,109],"Visualization,":[46],"novel":[48],"task":[49],"that":[50,108],"transforms":[51],"dialogue":[52],"dynamic,":[55],"multi-view":[56,92],"storyboards.":[57],"We":[58],"introduce":[59],"Director,":[61,69],"training-free":[63],"multimodal":[64,78],"framework":[65,75],"comprising":[66],"three":[67],"agents\u2013Script":[68],"Cinematographer,":[70],"Storyboard":[72],"Maker.":[73],"This":[74],"leverages":[76],"large":[77],"models":[79],"diffusion-based":[81],"architectures,":[82],"employing":[83],"techniques":[84],"such":[85],"as":[86],"Chain-of-Thought":[87],"reasoning,":[88],"Retrieval-Augmented":[89],"Generation,":[90],"synthesis":[93],"improve":[95],"comprehension,":[100],"knowledge":[103],"integration.":[104],"Experimental":[105],"results":[106],"demonstrate":[107],"Director":[110],"outperforms":[111],"state-of-the-art":[112],"methods":[113],"interpretation,":[116],"world":[118],"principle":[122],"application,":[123],"significantly":[124],"advancing":[125],"quality":[127],"controllability":[129],"dialogue-based":[131]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-30T00:00:00"}
