{"id":"https://openalex.org/W4390446634","doi":"https://doi.org/10.1145/3617233.3617246","title":"Retrieved Generative Captioning for Medical Images","display_name":"Retrieved Generative Captioning for Medical Images","publication_year":2023,"publication_date":"2023-09-20","ids":{"openalex":"https://openalex.org/W4390446634","doi":"https://doi.org/10.1145/3617233.3617246"},"language":"en","primary_location":{"id":"doi:10.1145/3617233.3617246","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3617233.3617246","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3617233.3617246","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"20th International Conference on Content-based Multimedia Indexing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3617233.3617246","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084209353","display_name":"Djamila Romaissa Beddiar","orcid":"https://orcid.org/0000-0002-1371-3881"},"institutions":[{"id":"https://openalex.org/I98381234","display_name":"University of Oulu","ror":"https://ror.org/03yj89h83","country_code":"FI","type":"education","lineage":["https://openalex.org/I98381234"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Djamila Romaissa Beddiar","raw_affiliation_strings":["Center for machine vision and signal analysis, university of Oulu, Finland"],"affiliations":[{"raw_affiliation_string":"Center for machine vision and signal analysis, university of Oulu, Finland","institution_ids":["https://openalex.org/I98381234"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068812101","display_name":"Mourad Oussalah","orcid":"https://orcid.org/0000-0002-4422-8723"},"institutions":[{"id":"https://openalex.org/I98381234","display_name":"University of Oulu","ror":"https://ror.org/03yj89h83","country_code":"FI","type":"education","lineage":["https://openalex.org/I98381234"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Mourad Oussalah","raw_affiliation_strings":["Center for machine vision and signal analysis, university of Oulu; Faculty of Medicine, University of Oulu, Finland","Center for machine vision and signal analysis, university of Oulu"],"affiliations":[{"raw_affiliation_string":"Center for machine vision and signal analysis, university of Oulu; Faculty of Medicine, University of Oulu, Finland","institution_ids":["https://openalex.org/I98381234"]},{"raw_affiliation_string":"Center for machine vision and signal analysis, university of Oulu","institution_ids":["https://openalex.org/I98381234"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059580063","display_name":"Tapio Sepp\u00e4nen","orcid":"https://orcid.org/0000-0002-3963-0750"},"institutions":[{"id":"https://openalex.org/I98381234","display_name":"University of Oulu","ror":"https://ror.org/03yj89h83","country_code":"FI","type":"education","lineage":["https://openalex.org/I98381234"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Tapio Seppanen","raw_affiliation_strings":["Center for machine vision and signal analysis, university of Oulu, Finland"],"affiliations":[{"raw_affiliation_string":"Center for machine vision and signal analysis, university of Oulu, Finland","institution_ids":["https://openalex.org/I98381234"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5084209353"],"corresponding_institution_ids":["https://openalex.org/I98381234"],"apc_list":null,"apc_paid":null,"fwci":0.2456,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.55655789,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"48","last_page":"54"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9739999771118164,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9707000255584717,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9888587594032288},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8455548882484436},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.6406860947608948},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6379141211509705},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.6375402212142944},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.506946325302124},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.48432743549346924},{"id":"https://openalex.org/keywords/intersection","display_name":"Intersection (aeronautics)","score":0.4834052622318268},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.48332902789115906},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4747990667819977},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.4640326499938965},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.45876118540763855},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4220786392688751},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.32175901532173157}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9888587594032288},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8455548882484436},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.6406860947608948},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6379141211509705},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.6375402212142944},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.506946325302124},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.48432743549346924},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.4834052622318268},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.48332902789115906},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4747990667819977},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.4640326499938965},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.45876118540763855},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4220786392688751},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32175901532173157},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3617233.3617246","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3617233.3617246","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3617233.3617246","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"20th International Conference on Content-based Multimedia Indexing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3617233.3617246","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3617233.3617246","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3617233.3617246","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"20th International Conference on Content-based Multimedia Indexing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390446634.pdf","grobid_xml":"https://content.openalex.org/works/W4390446634.grobid-xml"},"referenced_works_count":9,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2897980926","https://openalex.org/W2963084599","https://openalex.org/W2963967185","https://openalex.org/W3126988965","https://openalex.org/W3154326567","https://openalex.org/W4213315016","https://openalex.org/W4308788593","https://openalex.org/W4327780528"],"related_works":["https://openalex.org/W4365211920","https://openalex.org/W3014948380","https://openalex.org/W4380551139","https://openalex.org/W4317695495","https://openalex.org/W4287117424","https://openalex.org/W4387506531","https://openalex.org/W4238433571","https://openalex.org/W3009270862","https://openalex.org/W2967848559","https://openalex.org/W4299831724"],"abstract_inverted_index":{"Understanding":[0],"the":[1,50,95,149,153,157,162,167,181,184,191,200,215,231,245,251,255,264,270,285,291,304,308],"content":[2],"of":[3,18,69,117,180,205,242,282],"medical":[4,32,51,128,144,232],"images":[5],"and":[6,24,60,123,187,222,248,274,288,311],"mapping":[7],"it":[8],"into":[9],"text":[10],"is":[11,29,209,269],"a":[12,37,103,115,118,124,239,279],"very":[13],"trending":[14],"topic":[15],"in":[16,40,49,112],"intersection":[17],"two":[19],"main":[20],"domains;":[21],"computer":[22],"vision":[23],"natural":[25],"language":[26],"processing.":[27],"This":[28,170],"known":[30],"as":[31],"image":[33,64,129],"captioning,":[34],"which":[35,97,160,236],"plays":[36],"vital":[38],"role":[39],"developing":[41],"automatic":[42],"systems":[43,85],"for":[44,56,63,127,142,190,235,244,250,254,263,284,290],"diagnosis":[45,84],"purposes.":[46],"Recent":[47],"research":[48],"field":[52],"provided":[53],"promising":[54],"results":[55,260,297,306],"both":[57,207],"deep-learning":[58,120],"based":[59,121],"retrieval-based":[61,125,158,201],"models":[62,208],"captioning.":[65,130],"However,":[66],"each":[67,213],"one":[68],"them":[70],"has":[71],"its":[72],"own":[73],"drawbacks,":[74],"that":[75,195],"can":[76,105],"be":[77,99,300,313],"overcome":[78],"if":[79],"combined.":[80],"In":[81,107],"addition,":[82],"existing":[83],"are":[86],"still":[87],"not":[88],"able":[89],"to":[90,101,138,156,175,299],"provide":[91],"enough":[92],"explanation":[93],"about":[94],"findings,":[96],"might":[98],"similar":[100,164],"what":[102],"physician":[104],"deliver.":[106],"this":[108,113],"regard,":[109],"we":[110,132,147,237,277],"present":[111],"paper":[114],"combination":[116],"generative":[119,154,185],"method":[122],"model":[126,137,155,229,266],"First,":[131],"train":[133],"an":[134],"attention-based":[135],"encoder-decoder":[136],"generate":[139,176],"new":[140],"captions":[141],"given":[143],"images.":[145],"Then,":[146],"fit":[148],"generated":[150,221,273],"caption":[151,165,182,194,216,268],"from":[152,166],"model,":[159],"retrieves":[161],"most":[163,177,192],"training":[168],"database.":[169],"multi-stage":[171,256],"approach":[172],"allows":[173],"us":[174],"important":[178],"words":[179,198],"(with":[183,199],"model)":[186],"then":[188],"search":[189],"close":[193],"includes":[196],"such":[197],"model).":[202],"Another":[203],"way":[204],"combining":[206],"by":[210],"selecting":[211],"at":[212],"time":[214],"with":[217],"highest":[218],"score":[219,241],"among":[220,272],"retrieved":[223],"captions.":[224],"We":[225],"evaluate":[226],"our":[227,296],"proposed":[228],"on":[230,307],"ROCO":[233],"dataset":[234,310],"achieved":[238,262],"BLEU-4":[240,280],"07.89":[243],"radiology":[246,286],"class":[247,287],"03.19":[249],"out-of-class":[252,292],"data,":[253],"model.":[257],"Similarly,":[258],"best":[259,271],"were":[261],"fused":[265],"(predicted":[267],"retrieved)":[275],"where":[276],"obtain":[278],"values":[281],"18.61":[283],"13.28":[289],"data.":[293],"Even":[294],"though":[295],"seem":[298],"low,":[301],"they":[302],"outperformed":[303],"state-of-the-art":[305],"same":[309],"could":[312],"further":[314],"improved.":[315]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
