{"id":"https://openalex.org/W4403511756","doi":"https://doi.org/10.23919/spa61993.2024.10715604","title":"Siamese-Driven Optimization for Low-Resolution Image Latent Embedding in Image Captioning","display_name":"Siamese-Driven Optimization for Low-Resolution Image Latent Embedding in Image Captioning","publication_year":2024,"publication_date":"2024-09-25","ids":{"openalex":"https://openalex.org/W4403511756","doi":"https://doi.org/10.23919/spa61993.2024.10715604"},"language":"en","primary_location":{"id":"doi:10.23919/spa61993.2024.10715604","is_oa":false,"landing_page_url":"https://doi.org/10.23919/spa61993.2024.10715604","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 Signal Processing: Algorithms, Architectures, Arrangements, and Applications (SPA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2512.08873","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114163349","display_name":"Jing Tan","orcid":"https://orcid.org/0009-0003-7249-0483"},"institutions":[{"id":"https://openalex.org/I931681460","display_name":"Universiti Tunku Abdul Rahman","ror":"https://ror.org/050pq4m56","country_code":"MY","type":"education","lineage":["https://openalex.org/I931681460"]}],"countries":["MY"],"is_corresponding":true,"raw_author_name":"Jing Jie Tan","raw_affiliation_strings":["Universiti Tunku Abdul Rahman,Lee Kong Chian Faculty of Engineering and Science,Department of Mechatronics and Biomedical Engineering,Malaysia"],"affiliations":[{"raw_affiliation_string":"Universiti Tunku Abdul Rahman,Lee Kong Chian Faculty of Engineering and Science,Department of Mechatronics and Biomedical Engineering,Malaysia","institution_ids":["https://openalex.org/I931681460"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035263631","display_name":"Anissa Mokraoui","orcid":"https://orcid.org/0000-0001-6447-8722"},"institutions":[{"id":"https://openalex.org/I4210091279","display_name":"Universit\u00e9 Sorbonne Paris Nord","ror":"https://ror.org/0199hds37","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210091279"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Anissa Mokraoui","raw_affiliation_strings":["Universit&#x00E9; Sorbonne Paris Nord,Laboratoire de traitement et transport de l&#x2019;information,France"],"affiliations":[{"raw_affiliation_string":"Universit&#x00E9; Sorbonne Paris Nord,Laboratoire de traitement et transport de l&#x2019;information,France","institution_ids":["https://openalex.org/I4210091279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109030955","display_name":"Ban-Hoe Kwan","orcid":null},"institutions":[{"id":"https://openalex.org/I931681460","display_name":"Universiti Tunku Abdul Rahman","ror":"https://ror.org/050pq4m56","country_code":"MY","type":"education","lineage":["https://openalex.org/I931681460"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Ban-Hoe Kwan","raw_affiliation_strings":["Universiti Tunku Abdul Rahman,Lee Kong Chian Faculty of Engineering and Science,Department of Mechatronics and Biomedical Engineering,Malaysia"],"affiliations":[{"raw_affiliation_string":"Universiti Tunku Abdul Rahman,Lee Kong Chian Faculty of Engineering and Science,Department of Mechatronics and Biomedical Engineering,Malaysia","institution_ids":["https://openalex.org/I931681460"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003031675","display_name":"Danny Wee-Kiat Ng","orcid":"https://orcid.org/0000-0001-9972-2676"},"institutions":[{"id":"https://openalex.org/I931681460","display_name":"Universiti Tunku Abdul Rahman","ror":"https://ror.org/050pq4m56","country_code":"MY","type":"education","lineage":["https://openalex.org/I931681460"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Danny Wee-Kiat Ng","raw_affiliation_strings":["Universiti Tunku Abdul Rahman,Lee Kong Chian Faculty of Engineering and Science,Department of Mechatronics and Biomedical Engineering,Malaysia"],"affiliations":[{"raw_affiliation_string":"Universiti Tunku Abdul Rahman,Lee Kong Chian Faculty of Engineering and Science,Department of Mechatronics and Biomedical Engineering,Malaysia","institution_ids":["https://openalex.org/I931681460"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040863689","display_name":"Yan Chai Hum","orcid":"https://orcid.org/0000-0002-9657-8311"},"institutions":[{"id":"https://openalex.org/I931681460","display_name":"Universiti Tunku Abdul Rahman","ror":"https://ror.org/050pq4m56","country_code":"MY","type":"education","lineage":["https://openalex.org/I931681460"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Yan-Chai Hum","raw_affiliation_strings":["Universiti Tunku Abdul Rahman,Lee Kong Chian Faculty of Engineering and Science,Department of Mechatronics and Biomedical Engineering,Malaysia"],"affiliations":[{"raw_affiliation_string":"Universiti Tunku Abdul Rahman,Lee Kong Chian Faculty of Engineering and Science,Department of Mechatronics and Biomedical Engineering,Malaysia","institution_ids":["https://openalex.org/I931681460"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5114163349"],"corresponding_institution_ids":["https://openalex.org/I931681460"],"apc_list":null,"apc_paid":null,"fwci":0.2501,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.54180172,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"79","last_page":"84"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.7960742712020874},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.7153973579406738},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6971208453178406},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6461039781570435},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6254074573516846},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5640515685081482},{"id":"https://openalex.org/keywords/resolution","display_name":"Resolution (logic)","score":0.42202189564704895}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.7960742712020874},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.7153973579406738},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6971208453178406},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6461039781570435},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6254074573516846},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5640515685081482},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.42202189564704895}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.23919/spa61993.2024.10715604","is_oa":false,"landing_page_url":"https://doi.org/10.23919/spa61993.2024.10715604","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 Signal Processing: Algorithms, Architectures, Arrangements, and Applications (SPA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2512.08873","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.08873","pdf_url":"https://arxiv.org/pdf/2512.08873","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:HAL:hal-04761587v1","is_oa":false,"landing_page_url":"https://hal.science/hal-04761587","pdf_url":null,"source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2024 Signal Processing: Algorithms, Architectures, Arrangements, and Applications (SPA), IEEE, Sep 2024, Poznan, France. pp.79-84, &#x27E8;10.23919/SPA61993.2024.10715604&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2512.08873","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.08873","pdf_url":"https://arxiv.org/pdf/2512.08873","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.5,"display_name":"Climate action","id":"https://metadata.un.org/sdg/13"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W68733909","https://openalex.org/W1905882502","https://openalex.org/W2140196014","https://openalex.org/W2963686907","https://openalex.org/W2969815465","https://openalex.org/W3015437081","https://openalex.org/W3023584964","https://openalex.org/W3080427797","https://openalex.org/W3115512796","https://openalex.org/W3126906961","https://openalex.org/W3163431160","https://openalex.org/W3193402893","https://openalex.org/W3202429845","https://openalex.org/W3216130706","https://openalex.org/W4220798687","https://openalex.org/W4321094913","https://openalex.org/W4385763884","https://openalex.org/W4386162736","https://openalex.org/W4396733011","https://openalex.org/W4399486525","https://openalex.org/W4399528683","https://openalex.org/W4402716330","https://openalex.org/W6769243733","https://openalex.org/W6803376173"],"related_works":["https://openalex.org/W3164229987","https://openalex.org/W3215212336","https://openalex.org/W4290852288","https://openalex.org/W3217388757","https://openalex.org/W3122720459","https://openalex.org/W4298897568","https://openalex.org/W4289422896","https://openalex.org/W1938708284","https://openalex.org/W4380190185","https://openalex.org/W2312145515"],"abstract_inverted_index":{"Image":[0,72,76],"captioning":[1],"is":[2,27],"essential":[3],"in":[4,24,60,75],"many":[5],"fields":[6],"including":[7],"assisting":[8],"visually":[9],"impaired":[10],"individuals,":[11],"improving":[12],"content":[13],"management":[14],"systems,":[15],"and":[16,55,102],"enhancing":[17,99],"human-computer":[18],"interaction.":[19],"However,":[20],"a":[21,80,91,112],"recent":[22],"challenge":[23],"this":[25],"domain":[26],"dealing":[28],"with":[29],"low-resolution":[30,86],"image":[31],"(LRI).":[32],"While":[33],"performance":[34],"can":[35],"be":[36],"improved":[37],"by":[38],"using":[39],"larger":[40],"models":[41,47],"like":[42],"transformers":[43],"for":[44,70,84,129],"encoding,":[45],"these":[46],"are":[48],"typically":[49],"heavyweight,":[50],"demanding":[51],"significant":[52],"computational":[53,119],"resources":[54],"memory,":[56],"leading":[57],"to":[58,95],"challenges":[59],"retraining.":[61],"To":[62],"address":[63],"this,":[64],"the":[65,100,105],"proposed":[66],"SOLI":[67,117],"(Siamese-Driven":[68],"Optimization":[69],"Low-Resolution":[71],"Latent":[73],"Embedding":[74],"Captioning)":[77],"approach":[78],"presents":[79],"solution":[81],"specifically":[82],"designed":[83],"lightweight,":[85],"images":[87],"captioning.":[88],"It":[89],"employs":[90],"Siamese":[92],"network":[93,115],"architecture":[94],"optimize":[96],"latent":[97],"embeddings,":[98],"efficiency":[101],"accuracy":[103],"of":[104],"image-to-text":[106],"translation":[107],"process.":[108],"By":[109],"focusing":[110],"on":[111,131],"dual-pathway":[113],"neural":[114],"structure,":[116],"minimizes":[118],"overhead":[120],"without":[121],"sacrificing":[122],"performance,":[123],"making":[124],"it":[125],"an":[126],"ideal":[127],"choice":[128],"training":[130],"resource-constrained":[132],"scenarios.":[133]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2025-10-10T00:00:00"}
