{"id":"https://openalex.org/W3130160131","doi":"https://doi.org/10.5220/0010314301960205","title":"Contextualise, Attend, Modulate and Tell: Visual Storytelling","display_name":"Contextualise, Attend, Modulate and Tell: Visual Storytelling","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3130160131","doi":"https://doi.org/10.5220/0010314301960205","mag":"3130160131"},"language":"en","primary_location":{"id":"doi:10.5220/0010314301960205","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0010314301960205","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.5220/0010314301960205","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016068746","display_name":"Zainy M. Malakan","orcid":"https://orcid.org/0000-0002-6980-0992"},"institutions":[{"id":"https://openalex.org/I177877127","display_name":"University of Western Australia","ror":"https://ror.org/047272k79","country_code":"AU","type":"education","lineage":["https://openalex.org/I177877127"]},{"id":"https://openalex.org/I199693650","display_name":"Umm al-Qura University","ror":"https://ror.org/01xjqrm90","country_code":"SA","type":"education","lineage":["https://openalex.org/I199693650"]}],"countries":["AU","SA"],"is_corresponding":true,"raw_author_name":"Zainy Malakan","raw_affiliation_strings":["Department of Computer Science and Software Engineering, The University of Western Australia, Australia, --- Select a Country ---","Department of Information Science, Faculty of Computer Science and Information System, Umm Al-Qura University, Saudi Arabia, --- Select a Country ---"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Software Engineering, The University of Western Australia, Australia, --- Select a Country ---","institution_ids":["https://openalex.org/I177877127"]},{"raw_affiliation_string":"Department of Information Science, Faculty of Computer Science and Information System, Umm Al-Qura University, Saudi Arabia, --- Select a Country ---","institution_ids":["https://openalex.org/I199693650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003668441","display_name":"Nayyer Aafaq","orcid":"https://orcid.org/0000-0003-2763-2094"},"institutions":[{"id":"https://openalex.org/I177877127","display_name":"University of Western Australia","ror":"https://ror.org/047272k79","country_code":"AU","type":"education","lineage":["https://openalex.org/I177877127"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Nayyer Aafaq","raw_affiliation_strings":["Department of Computer Science and Software Engineering, The University of Western Australia, Australia, --- Select a Country ---"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Software Engineering, The University of Western Australia, Australia, --- Select a Country ---","institution_ids":["https://openalex.org/I177877127"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023496317","display_name":"Ghulam Mubashar Hassan","orcid":"https://orcid.org/0000-0002-6636-8807"},"institutions":[{"id":"https://openalex.org/I177877127","display_name":"University of Western Australia","ror":"https://ror.org/047272k79","country_code":"AU","type":"education","lineage":["https://openalex.org/I177877127"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Ghulam Hassan","raw_affiliation_strings":["Department of Computer Science and Software Engineering, The University of Western Australia, Australia, --- Select a Country ---"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Software Engineering, The University of Western Australia, Australia, --- Select a Country ---","institution_ids":["https://openalex.org/I177877127"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089986388","display_name":"Ajmal Mian","orcid":"https://orcid.org/0000-0002-5206-3842"},"institutions":[{"id":"https://openalex.org/I177877127","display_name":"University of Western Australia","ror":"https://ror.org/047272k79","country_code":"AU","type":"education","lineage":["https://openalex.org/I177877127"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Ajmal Mian","raw_affiliation_strings":["Department of Computer Science and Software Engineering, The University of Western Australia, Australia, --- Select a Country ---"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Software Engineering, The University of Western Australia, Australia, --- Select a Country ---","institution_ids":["https://openalex.org/I177877127"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5016068746"],"corresponding_institution_ids":["https://openalex.org/I177877127","https://openalex.org/I199693650"],"apc_list":null,"apc_paid":null,"fwci":1.3839,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.80896209,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"196","last_page":"205"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13071","display_name":"Digital Storytelling and Education","score":0.9581999778747559,"subfield":{"id":"https://openalex.org/subfields/3616","display_name":"Speech and Hearing"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T13071","display_name":"Digital Storytelling and Education","score":0.9581999778747559,"subfield":{"id":"https://openalex.org/subfields/3616","display_name":"Speech and Hearing"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/storytelling","display_name":"Storytelling","score":0.7866067886352539},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.501805305480957},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.39961785078048706},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.32411378622055054},{"id":"https://openalex.org/keywords/narrative","display_name":"Narrative","score":0.2336568534374237},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.13941076397895813},{"id":"https://openalex.org/keywords/literature","display_name":"Literature","score":0.057880699634552}],"concepts":[{"id":"https://openalex.org/C2776538412","wikidata":"https://www.wikidata.org/wiki/Q989963","display_name":"Storytelling","level":3,"score":0.7866067886352539},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.501805305480957},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39961785078048706},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.32411378622055054},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.2336568534374237},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.13941076397895813},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.057880699634552}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.5220/0010314301960205","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0010314301960205","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:publications/7d498875-406d-44b1-945f-9343c42cd743","is_oa":true,"landing_page_url":"https://research-repository.uwa.edu.au/en/publications/7d498875-406d-44b1-945f-9343c42cd743","pdf_url":null,"source":{"id":"https://openalex.org/S4306402523","display_name":"UWA Profiles and Research Repository (University of Western Australia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I177877127","host_organization_name":"The University of Western Australia","host_organization_lineage":["https://openalex.org/I177877127"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Malakan, Z M, Aafaq, N, Hassan, G M & Mian, A 2021, Contextualise, Attend, Modulate and Tell : Visual Storytelling. in G M Farinella, P Radeva, J Braz & K Bouatouch (eds), Proceedings of the 16th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications. vol. 5, Scitepress, pp. 196-205, 16th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications, Vienna, Austria, 8/02/21. https://doi.org/10.5220/0010314301960205","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.atira.dk:openaire/7d498875-406d-44b1-945f-9343c42cd743","is_oa":true,"landing_page_url":"https://admin.research-repository.uwa.edu.au/en/publications/7d498875-406d-44b1-945f-9343c42cd743","pdf_url":null,"source":{"id":"https://openalex.org/S4306402523","display_name":"UWA Profiles and Research Repository (University of Western Australia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I177877127","host_organization_name":"The University of Western Australia","host_organization_lineage":["https://openalex.org/I177877127"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Malakan, Z M, Aafaq, N, Hassan, G M & Mian, A 2021, Contextualise, Attend, Modulate and Tell : Visual Storytelling. in G M Farinella, P Radeva, J Braz & K Bouatouch (eds), Proceedings of the 16th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications. vol. 5, Scitepress, pp. 196-205, 16th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications, Vienna, Austria, 8/02/21. https://doi.org/10.5220/0010314301960205","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.5220/0010314301960205","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0010314301960205","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2884441370","https://openalex.org/W1536421369","https://openalex.org/W2082193010","https://openalex.org/W2086118318","https://openalex.org/W4319988281","https://openalex.org/W2993120730","https://openalex.org/W1765898938","https://openalex.org/W2970945984","https://openalex.org/W2355818213"],"abstract_inverted_index":{"Automatic":[0],"natural":[1],"language":[2,50,83,159],"description":[3],"of":[4,35,48,71,82,153,187],"visual":[5,65,76],"content":[6],"is":[7,37,172,191],"an":[8,137],"emerging":[9],"and":[10,55,75,78,99,121,133,167,181,217,221],"fast-growing":[11],"topic":[12],"that":[13,58,102,140,195,223],"has":[14],"attracted":[15],"extensive":[16],"research":[17],"attention":[18,138],"recently.":[19],"However,":[20],"different":[21],"from":[22,32],"typical":[23],"\u2018image":[24],"captioning\u2019":[25],"or":[26],"\u2018video":[27],"captioning\u2019,":[28],"coherent":[29],"story":[30,202],"generation":[31,44],"a":[33,38,93,130,158,198],"sequence":[34,110],"images":[36,156],"relatively":[39],"less":[40],"studied":[41],"problem.":[42],"Story":[43],"poses":[45],"the":[46,64,73,104,108,122,142,146,150,154,164,168,176,179,185,192,209],"challenges":[47],"diverse":[49],"style,":[51],"context":[52,74],"modeling,":[53],"coherence":[54,84],"latent":[56],"concepts":[57],"are":[59,126],"not":[60],"even":[61],"visible":[62],"in":[63,111],"content.":[66],"Contemporary":[67],"methods":[68],"fall":[69],"short":[70],"modeling":[72],"variance,":[77],"generate":[79],"stories":[80],"devoid":[81],"among":[85,107,145],"multiple":[86],"sentences.":[87],"To":[88,184],"this":[89,190],"end,":[90],"we":[91],"propose":[92],"novel":[94],"framework":[95],"Contextualize,":[96],"Attend,":[97],"Modulate":[98],"Tell":[100],"(CAMT)":[101],"models":[103],"temporal":[105],"relationship":[106],"image":[109,124],"forward":[112],"as":[113,115],"well":[114],"backward":[116],"direction.":[117],"The":[118],"contextual":[119],"information":[120],"regional":[123],"features":[125],"then":[127,134],"projected":[128],"into":[129,157],"joint":[131],"space":[132],"subjected":[135],"to":[136,174],"mechanism":[139],"captures":[141],"spatio-temporal":[143],"relationships":[144],"images.":[147],"Before":[148],"feeding":[149],"attentive":[151,165],"representations":[152],"input":[155,169],"model,":[160],"gated":[161],"modulation":[162,199],"between":[163,178],"representation":[166],"word":[170],"embeddings":[171],"performed":[173],"capture":[175],"interaction":[177],"inputs":[180],"their":[182],"context.":[183],"best":[186],"our":[188,206,224],"knowledge,":[189],"first":[193],"method":[194],"exploits":[196],"such":[197],"technique":[200],"for":[201],"generation.":[203],"We":[204],"evaluate":[205],"model":[207,226],"on":[208],"Visual":[210],"Storytelling":[211],"Dataset":[212],"(VIST)":[213],"employing":[214],"both":[215],"automatic":[216],"human":[218],"evaluation":[219],"measures":[220],"demonstrate":[222],"CAMT":[225],"achieves":[227],"better":[228],"performance":[229],"than":[230],"existing":[231],"baselines.":[232]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
