{"id":"https://openalex.org/W4416386427","doi":"https://doi.org/10.20965/jaciii.2025.p1417","title":"Interactive Image Caption Generation Reflecting User Intent from Trace Using a Diffusion Language Model","display_name":"Interactive Image Caption Generation Reflecting User Intent from Trace Using a Diffusion Language Model","publication_year":2025,"publication_date":"2025-11-19","ids":{"openalex":"https://openalex.org/W4416386427","doi":"https://doi.org/10.20965/jaciii.2025.p1417"},"language":"en","primary_location":{"id":"doi:10.20965/jaciii.2025.p1417","is_oa":true,"landing_page_url":"https://doi.org/10.20965/jaciii.2025.p1417","pdf_url":null,"source":{"id":"https://openalex.org/S4511983","display_name":"Journal of Advanced Computational Intelligence and Intelligent Informatics","issn_l":"1343-0130","issn":["1343-0130","1883-8014"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4324309662","host_organization_name":"Fuji Technology Press Ltd.","host_organization_lineage":["https://openalex.org/P4324309662"],"host_organization_lineage_names":["Fuji Technology Press Ltd."],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Advanced Computational Intelligence and Intelligent Informatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.20965/jaciii.2025.p1417","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Satoko Hirano","orcid":null},"institutions":[{"id":"https://openalex.org/I114030911","display_name":"Otsuka (Japan)","ror":"https://ror.org/013k5y296","country_code":"JP","type":"company","lineage":["https://openalex.org/I114030911"]},{"id":"https://openalex.org/I26120043","display_name":"Ochanomizu University","ror":"https://ror.org/03599d813","country_code":"JP","type":"education","lineage":["https://openalex.org/I26120043"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoko Hirano","raw_affiliation_strings":["Ochanomizu University, 2-1-1 Otsuka, Bunkyo-ku, Tokyo 112-8610, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ochanomizu University, 2-1-1 Otsuka, Bunkyo-ku, Tokyo 112-8610, Japan","institution_ids":["https://openalex.org/I26120043","https://openalex.org/I114030911"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Editorial Office","orcid":null},"institutions":[{"id":"https://openalex.org/I114030911","display_name":"Otsuka (Japan)","ror":"https://ror.org/013k5y296","country_code":"JP","type":"company","lineage":["https://openalex.org/I114030911"]},{"id":"https://openalex.org/I26120043","display_name":"Ochanomizu University","ror":"https://ror.org/03599d813","country_code":"JP","type":"education","lineage":["https://openalex.org/I26120043"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Editorial Office","raw_affiliation_strings":["Ochanomizu University, 2-1-1 Otsuka, Bunkyo-ku, Tokyo 112-8610, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ochanomizu University, 2-1-1 Otsuka, Bunkyo-ku, Tokyo 112-8610, Japan","institution_ids":["https://openalex.org/I26120043","https://openalex.org/I114030911"]}]},{"author_position":"last","author":{"id":null,"display_name":"Ichiro Kobayashi","orcid":"https://orcid.org/0000-0001-7789-475X"},"institutions":[{"id":"https://openalex.org/I114030911","display_name":"Otsuka (Japan)","ror":"https://ror.org/013k5y296","country_code":"JP","type":"company","lineage":["https://openalex.org/I114030911"]},{"id":"https://openalex.org/I26120043","display_name":"Ochanomizu University","ror":"https://ror.org/03599d813","country_code":"JP","type":"education","lineage":["https://openalex.org/I26120043"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ichiro Kobayashi","raw_affiliation_strings":["Ochanomizu University, 2-1-1 Otsuka, Bunkyo-ku, Tokyo 112-8610, Japan"],"raw_orcid":"https://orcid.org/0000-0001-7789-475X","affiliations":[{"raw_affiliation_string":"Ochanomizu University, 2-1-1 Otsuka, Bunkyo-ku, Tokyo 112-8610, Japan","institution_ids":["https://openalex.org/I26120043","https://openalex.org/I114030911"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31048781,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"29","issue":"6","first_page":"1417","last_page":"1426"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9750000238418579,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9750000238418579,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.0052999998442828655,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.002300000051036477,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.8489999771118164},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8234000205993652},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.5605999827384949},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5001000165939331},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.491100013256073},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4675999879837036},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4480000138282776},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.44760000705718994}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8985000252723694},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.8489999771118164},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8234000205993652},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.5605999827384949},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5595999956130981},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5001000165939331},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.491100013256073},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4675999879837036},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4480000138282776},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.44760000705718994},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.41679999232292175},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.397599995136261},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3955000042915344},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36239999532699585},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.352400004863739},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.30329999327659607},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.2971000075340271},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.28679999709129333},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C2775997480","wikidata":"https://www.wikidata.org/wiki/Q586277","display_name":"Degree (music)","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.25609999895095825}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.20965/jaciii.2025.p1417","is_oa":true,"landing_page_url":"https://doi.org/10.20965/jaciii.2025.p1417","pdf_url":null,"source":{"id":"https://openalex.org/S4511983","display_name":"Journal of Advanced Computational Intelligence and Intelligent Informatics","issn_l":"1343-0130","issn":["1343-0130","1883-8014"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4324309662","host_organization_name":"Fuji Technology Press Ltd.","host_organization_lineage":["https://openalex.org/P4324309662"],"host_organization_lineage_names":["Fuji Technology Press Ltd."],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Advanced Computational Intelligence and Intelligent Informatics","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.20965/jaciii.2025.p1417","is_oa":true,"landing_page_url":"https://doi.org/10.20965/jaciii.2025.p1417","pdf_url":null,"source":{"id":"https://openalex.org/S4511983","display_name":"Journal of Advanced Computational Intelligence and Intelligent Informatics","issn_l":"1343-0130","issn":["1343-0130","1883-8014"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4324309662","host_organization_name":"Fuji Technology Press Ltd.","host_organization_lineage":["https://openalex.org/P4324309662"],"host_organization_lineage_names":["Fuji Technology Press Ltd."],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Advanced Computational Intelligence and Intelligent Informatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W1905882502","https://openalex.org/W2101105183","https://openalex.org/W3095670406","https://openalex.org/W3153469116","https://openalex.org/W4312933868","https://openalex.org/W4385571393","https://openalex.org/W4385571755","https://openalex.org/W4393160202","https://openalex.org/W4403943512","https://openalex.org/W4404820294"],"related_works":[],"abstract_inverted_index":{"This":[0],"study":[1],"proposes":[2],"an":[3],"image":[4],"captioning":[5],"method":[6,100],"designed":[7],"to":[8,72],"incorporate":[9],"user-specific":[10],"explanatory":[11],"intentions":[12],"into":[13],"the":[14,20,24,35,38,46,51,60,86,90,93,98,117,124],"generated":[15,125],"text,":[16],"as":[17],"signaled":[18],"by":[19,42,58],"user\u2019s":[21],"trace":[22,118],"on":[23,63,85],"image.":[25],"We":[26],"extract":[27],"areas":[28],"of":[29,34,40,53,89],"interest":[30,54],"from":[31,116],"dense":[32],"sections":[33],"trace,":[36],"determine":[37],"order":[39],"explanations":[41],"tracking":[43],"changes":[44],"in":[45,55,75,123],"pen-tip":[47],"coordinates,":[48],"and":[49,111,119],"assess":[50],"degree":[52],"each":[56],"area":[57],"analyzing":[59],"time":[61],"spent":[62],"them.":[64],"Additionally,":[65],"a":[66,76],"diffusion":[67],"language":[68],"model":[69],"is":[70],"utilized":[71],"generate":[73],"sentences":[74],"non-autoregressive":[77],"manner,":[78],"allowing":[79],"control":[80],"over":[81],"sentence":[82],"length":[83],"based":[84],"temporal":[87],"data":[88],"trace.":[91],"In":[92],"actual":[94],"caption":[95],"generation":[96],"task,":[97],"proposed":[99],"achieved":[101],"higher":[102],"string":[103],"similarity":[104],"than":[105],"conventional":[106],"methods,":[107],"including":[108],"autoregressive":[109],"models,":[110],"successfully":[112],"captured":[113],"user":[114],"intent":[115],"faithfully":[120],"reflected":[121],"it":[122],"text.":[126]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-19T00:00:00"}
