{"id":"https://openalex.org/W4416676400","doi":"https://doi.org/10.1007/978-981-95-4960-3_17","title":"CLIP-AMR-GPT: Enhancing Image Captioning via Cross-Modal Semantics Fusion and GPT-Based Re-ranking","display_name":"CLIP-AMR-GPT: Enhancing Image Captioning via Cross-Modal Semantics Fusion and GPT-Based Re-ranking","publication_year":2025,"publication_date":"2025-11-26","ids":{"openalex":"https://openalex.org/W4416676400","doi":"https://doi.org/10.1007/978-981-95-4960-3_17"},"language":"en","primary_location":{"id":"doi:10.1007/978-981-95-4960-3_17","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-981-95-4960-3_17","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042785046","display_name":"Nguy\u1ec5n V\u0103n Th\u1ecbnh","orcid":"https://orcid.org/0000-0002-7543-5207"},"institutions":[{"id":"https://openalex.org/I70349855","display_name":"Vietnam Academy of Science and Technology","ror":"https://ror.org/02wsd5p50","country_code":"VN","type":"funder","lineage":["https://openalex.org/I70349855"]},{"id":"https://openalex.org/I4210104442","display_name":"Ho Chi Minh City University of Education","ror":"https://ror.org/01cs0jg44","country_code":"VN","type":"education","lineage":["https://openalex.org/I4210104442"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Nguyen Van Thinh","raw_affiliation_strings":["Faculty of Information Technology, Ho Chi Minh City University of Education (HCMUE), Ho Chi Minh City, Vietnam","Graduate University of Science and Technology, Vietnam Academy of Science and Technology (VAST), Hanoi, Vietnam"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology, Ho Chi Minh City University of Education (HCMUE), Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I4210104442"]},{"raw_affiliation_string":"Graduate University of Science and Technology, Vietnam Academy of Science and Technology (VAST), Hanoi, Vietnam","institution_ids":["https://openalex.org/I70349855"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032730693","display_name":"Lang Tran","orcid":"https://orcid.org/0000-0002-8925-5549"},"institutions":[{"id":"https://openalex.org/I3129215250","display_name":"Ho Chi Minh City University of Foreign Languages-Informatics Technology","ror":"https://ror.org/02th1pb96","country_code":"VN","type":"education","lineage":["https://openalex.org/I3129215250"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Tran Van Lang","raw_affiliation_strings":["Journal Editorial Department, Ho Chi Minh City University of Foreign Languages and Information Technology (HUFLIT), Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"Journal Editorial Department, Ho Chi Minh City University of Foreign Languages and Information Technology (HUFLIT), Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I3129215250"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101739586","display_name":"Nguy\u1ec5n Minh H\u1ea3i","orcid":"https://orcid.org/0000-0002-4329-7428"},"institutions":[{"id":"https://openalex.org/I4210104442","display_name":"Ho Chi Minh City University of Education","ror":"https://ror.org/01cs0jg44","country_code":"VN","type":"education","lineage":["https://openalex.org/I4210104442"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Nguyen Minh Hai","raw_affiliation_strings":["Faculty of Physics, Ho Chi Minh City University of Education (HCMUE), Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"Faculty of Physics, Ho Chi Minh City University of Education (HCMUE), Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I4210104442"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5042785046"],"corresponding_institution_ids":["https://openalex.org/I4210104442","https://openalex.org/I70349855"],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.78176611,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"210","last_page":"221"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.000699999975040555,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.000699999975040555,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9589999914169312},{"id":"https://openalex.org/keywords/fluency","display_name":"Fluency","score":0.5145000219345093},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5091999769210815},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4878999888896942},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4731999933719635},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4503999948501587},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4474000036716461},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4296000003814697},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.41130000352859497}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9589999914169312},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8971999883651733},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5723999738693237},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5508999824523926},{"id":"https://openalex.org/C2777413886","wikidata":"https://www.wikidata.org/wiki/Q3276013","display_name":"Fluency","level":2,"score":0.5145000219345093},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5091999769210815},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4878999888896942},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4731999933719635},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4503999948501587},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4474000036716461},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4296000003814697},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.41130000352859497},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.40790000557899475},{"id":"https://openalex.org/C2778328480","wikidata":"https://www.wikidata.org/wiki/Q1639904","display_name":"Hybrid image","level":3,"score":0.38260000944137573},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.37940001487731934},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.35440000891685486},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.33730000257492065},{"id":"https://openalex.org/C2989087649","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Image synthesis","level":3,"score":0.33250001072883606},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.32179999351501465},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.3158000111579895},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.31029999256134033},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.29750001430511475},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.2849999964237213},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.26829999685287476},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.2648000121116638},{"id":"https://openalex.org/C110484373","wikidata":"https://www.wikidata.org/wiki/Q264398","display_name":"Adjacency list","level":2,"score":0.25609999895095825}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/978-981-95-4960-3_17","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-981-95-4960-3_17","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W3035284526","https://openalex.org/W3216130706","https://openalex.org/W4282968790","https://openalex.org/W4292828275","https://openalex.org/W4296197540","https://openalex.org/W4387373084","https://openalex.org/W4391791507","https://openalex.org/W4391966553","https://openalex.org/W4400810498","https://openalex.org/W4403451719","https://openalex.org/W4405178275","https://openalex.org/W4406416307","https://openalex.org/W4411176006","https://openalex.org/W4411798948","https://openalex.org/W4412414602"],"related_works":[],"abstract_inverted_index":null,"counts_by_year":[],"updated_date":"2025-11-28T20:23:30.129472","created_date":"2025-11-27T00:00:00"}
