{"id":"https://openalex.org/W7124982761","doi":"https://doi.org/10.1109/cbmi66578.2025.11339280","title":"Media Search: A Multi-Stage Image Retrieval Framework with Enriched Image Captioning","display_name":"Media Search: A Multi-Stage Image Retrieval Framework with Enriched Image Captioning","publication_year":2025,"publication_date":"2025-10-22","ids":{"openalex":"https://openalex.org/W7124982761","doi":"https://doi.org/10.1109/cbmi66578.2025.11339280"},"language":null,"primary_location":{"id":"doi:10.1109/cbmi66578.2025.11339280","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cbmi66578.2025.11339280","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Content-Based Multimedia Indexing (CBMI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047059451","display_name":"Ayse Vildan Nurdag","orcid":null},"institutions":[{"id":"https://openalex.org/I2799978770","display_name":"X-Fab (Germany)","ror":"https://ror.org/030bh9196","country_code":"DE","type":"company","lineage":["https://openalex.org/I2799978770"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Ay\u015fe Vildan Nurda\u011f","raw_affiliation_strings":["Bah&#x00E7;e&#x015F;ehir University,Istanbul,T&#x00FC;rkiye"],"affiliations":[{"raw_affiliation_string":"Bah&#x00E7;e&#x015F;ehir University,Istanbul,T&#x00FC;rkiye","institution_ids":["https://openalex.org/I2799978770"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122974500","display_name":"Mete Mert Birdal","orcid":null},"institutions":[{"id":"https://openalex.org/I4210154164","display_name":"Turkcell (Turkey)","ror":"https://ror.org/051vt5y84","country_code":"TR","type":"company","lineage":["https://openalex.org/I4210154164"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Mete Mert Birdal","raw_affiliation_strings":["Turkcell Technology,Istanbul,T&#x00FC;rkiye"],"affiliations":[{"raw_affiliation_string":"Turkcell Technology,Istanbul,T&#x00FC;rkiye","institution_ids":["https://openalex.org/I4210154164"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123420125","display_name":"Yusuf Yaz\u0131c\u0131","orcid":null},"institutions":[{"id":"https://openalex.org/I4210154164","display_name":"Turkcell (Turkey)","ror":"https://ror.org/051vt5y84","country_code":"TR","type":"company","lineage":["https://openalex.org/I4210154164"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Yusuf Yaz\u0131c\u0131","raw_affiliation_strings":["Turkcell Technology,Istanbul,T&#x00FC;rkiye"],"affiliations":[{"raw_affiliation_string":"Turkcell Technology,Istanbul,T&#x00FC;rkiye","institution_ids":["https://openalex.org/I4210154164"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123403385","display_name":"Bar\u0131\u015f \u00d6zcan","orcid":null},"institutions":[{"id":"https://openalex.org/I2799978770","display_name":"X-Fab (Germany)","ror":"https://ror.org/030bh9196","country_code":"DE","type":"company","lineage":["https://openalex.org/I2799978770"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Bar\u0131\u015f \u00d6zcan","raw_affiliation_strings":["Bah&#x00E7;e&#x015F;ehir University,Istanbul,T&#x00FC;rkiye"],"affiliations":[{"raw_affiliation_string":"Bah&#x00E7;e&#x015F;ehir University,Istanbul,T&#x00FC;rkiye","institution_ids":["https://openalex.org/I2799978770"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074858843","display_name":"Erkut Ar\u0131can","orcid":"https://orcid.org/0000-0003-4528-3203"},"institutions":[{"id":"https://openalex.org/I2799978770","display_name":"X-Fab (Germany)","ror":"https://ror.org/030bh9196","country_code":"DE","type":"company","lineage":["https://openalex.org/I2799978770"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Erkut Ar\u0131can","raw_affiliation_strings":["Bah&#x00E7;e&#x015F;ehir University,Istanbul,T&#x00FC;rkiye"],"affiliations":[{"raw_affiliation_string":"Bah&#x00E7;e&#x015F;ehir University,Istanbul,T&#x00FC;rkiye","institution_ids":["https://openalex.org/I2799978770"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5047059451"],"corresponding_institution_ids":["https://openalex.org/I2799978770"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.7001375,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9287999868392944,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9287999868392944,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.03180000185966492,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.016100000590085983,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8513000011444092},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.7001000046730042},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.546500027179718},{"id":"https://openalex.org/keywords/automatic-image-annotation","display_name":"Automatic image annotation","score":0.4749999940395355},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.47290000319480896},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.40070000290870667},{"id":"https://openalex.org/keywords/visual-word","display_name":"Visual Word","score":0.37380000948905945},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.35199999809265137}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8513000011444092},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7936000227928162},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.7001000046730042},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5809999704360962},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.546500027179718},{"id":"https://openalex.org/C199579030","wikidata":"https://www.wikidata.org/wiki/Q2851778","display_name":"Automatic image annotation","level":4,"score":0.4749999940395355},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.47290000319480896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.444599986076355},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.40070000290870667},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3797999918460846},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.37380000948905945},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.35199999809265137},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C108170787","wikidata":"https://www.wikidata.org/wiki/Q3951828","display_name":"Agency (philosophy)","level":2,"score":0.32120001316070557},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2955999970436096},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.2721000015735626},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.27079999446868896},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25690001249313354}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cbmi66578.2025.11339280","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cbmi66578.2025.11339280","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Content-Based Multimedia Indexing (CBMI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W1956340063","https://openalex.org/W2066134726","https://openalex.org/W2101105183","https://openalex.org/W2970982005","https://openalex.org/W4415795285"],"related_works":[],"abstract_inverted_index":{"Finding":[0],"appropriate":[1],"stock":[2],"images":[3],"remains":[4],"a":[5,18,29,76],"significant":[6],"challenge":[7],"for":[8],"media":[9],"agency":[10],"professionals.":[11],"In":[12,38],"particular,":[13],"retrieving":[14],"visuals":[15],"that":[16,31,95],"match":[17],"mental":[19],"design":[20],"concept\u0214without":[21],"prior":[22],"knowledge":[23],"of":[24],"the":[25,64,131],"image":[26,44,71,82],"pool's":[27],"contents\u0214is":[28],"problem":[30],"traditional":[32],"search":[33],"engines":[34],"struggle":[35],"to":[36,54,80,118],"address.":[37],"this":[39,96],"study,":[40],"we":[41,109,115],"propose":[42],"an":[43,90],"retrieval":[45,103],"solution":[46],"powered":[47],"by":[48],"large":[49],"language":[50],"models":[51,127],"(LLMs),":[52],"designed":[53],"help":[55],"creative":[56],"teams":[57],"working":[58],"on":[59,120],"corporate":[60],"advertisements":[61],"efficiently":[62],"locate":[63],"most":[65],"relevant":[66],"visual":[67],"assets":[68],"from":[69],"internal":[70],"databases.":[72],"Our":[73],"approach":[74],"utilizes":[75],"state-of-the-art":[77],"vision-language":[78],"model":[79],"generate":[81],"captions,":[83],"which":[84],"are":[85],"then":[86],"semantically":[87],"enriched":[88],"using":[89],"LLM.":[91],"Experimental":[92],"evaluations":[93],"demonstrate":[94],"multi-stage":[97],"method":[98],"yields":[99],"substantial":[100],"improvements":[101],"in":[102,130],"accuracy":[104],"and":[105,123,128],"contextual":[106],"relevance.":[107],"While":[108],"initially":[110],"considered":[111],"adding":[112],"comparative":[113],"benchmarks,":[114],"opted":[116],"instead":[117],"focus":[119],"practical":[121],"deployment":[122],"evaluation,":[124],"referencing":[125],"related":[126],"datasets":[129],"literature":[132],"where":[133],"relevant.":[134]},"counts_by_year":[],"updated_date":"2026-01-22T23:29:09.771500","created_date":"2026-01-21T00:00:00"}
