{"id":"https://openalex.org/W4403601779","doi":"https://doi.org/10.3390/e26100876","title":"Image Captioning Based on Semantic Scenes","display_name":"Image Captioning Based on Semantic Scenes","publication_year":2024,"publication_date":"2024-10-18","ids":{"openalex":"https://openalex.org/W4403601779","doi":"https://doi.org/10.3390/e26100876","pmid":"https://pubmed.ncbi.nlm.nih.gov/39451952"},"language":"en","primary_location":{"id":"doi:10.3390/e26100876","is_oa":true,"landing_page_url":"https://doi.org/10.3390/e26100876","pdf_url":"https://www.mdpi.com/1099-4300/26/10/876/pdf?version=1729499930","source":{"id":"https://openalex.org/S195231649","display_name":"Entropy","issn_l":"1099-4300","issn":["1099-4300"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Entropy","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1099-4300/26/10/876/pdf?version=1729499930","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013029763","display_name":"Fengzhi Zhao","orcid":"https://orcid.org/0009-0008-9447-7825"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fengzhi Zhao","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, Changchun 130012, China","Key Laboratory of Symbolic Computation and Knowledge Engineering of Ministry of Education, Jilin University, Changchun 130012, China"],"raw_orcid":"https://orcid.org/0009-0008-9447-7825","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, Changchun 130012, China","institution_ids":["https://openalex.org/I194450716"]},{"raw_affiliation_string":"Key Laboratory of Symbolic Computation and Knowledge Engineering of Ministry of Education, Jilin University, Changchun 130012, China","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034801889","display_name":"Zhezhou Yu","orcid":"https://orcid.org/0000-0002-8066-5875"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4400600921","display_name":"Guangdong Peizheng College","ror":"https://ror.org/05fsjkf12","country_code":null,"type":"education","lineage":["https://openalex.org/I4400600921"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhezhou Yu","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, Changchun 130012, China","Guang Dong Peizheng College, Guangzhou 510830, China","Key Laboratory of Symbolic Computation and Knowledge Engineering of Ministry of Education, Jilin University, Changchun 130012, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, Changchun 130012, China","institution_ids":["https://openalex.org/I194450716"]},{"raw_affiliation_string":"Guang Dong Peizheng College, Guangzhou 510830, China","institution_ids":["https://openalex.org/I4400600921"]},{"raw_affiliation_string":"Key Laboratory of Symbolic Computation and Knowledge Engineering of Ministry of Education, Jilin University, Changchun 130012, China","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100453381","display_name":"Tao Wang","orcid":"https://orcid.org/0000-0001-5004-160X"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Wang","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, Changchun 130012, China","Key Laboratory of Symbolic Computation and Knowledge Engineering of Ministry of Education, Jilin University, Changchun 130012, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, Changchun 130012, China","institution_ids":["https://openalex.org/I194450716"]},{"raw_affiliation_string":"Key Laboratory of Symbolic Computation and Knowledge Engineering of Ministry of Education, Jilin University, Changchun 130012, China","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004465753","display_name":"LV Yi","orcid":"https://orcid.org/0000-0001-8755-3000"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Lv","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, Changchun 130012, China","Key Laboratory of Symbolic Computation and Knowledge Engineering of Ministry of Education, Jilin University, Changchun 130012, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, Changchun 130012, China","institution_ids":["https://openalex.org/I194450716"]},{"raw_affiliation_string":"Key Laboratory of Symbolic Computation and Knowledge Engineering of Ministry of Education, Jilin University, Changchun 130012, China","institution_ids":["https://openalex.org/I194450716"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5013029763"],"corresponding_institution_ids":["https://openalex.org/I194450716"],"apc_list":{"value":2000,"currency":"CHF","value_usd":2165},"apc_paid":{"value":2000,"currency":"CHF","value_usd":2165},"fwci":0.2317,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.52912118,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"26","issue":"10","first_page":"876","last_page":"876"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9804999828338623,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9841879606246948},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8317451477050781},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6733834147453308},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5510818362236023},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5246919989585876},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4673972725868225},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.461683452129364},{"id":"https://openalex.org/keywords/semantic-gap","display_name":"Semantic gap","score":0.46129047870635986},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.44117897748947144},{"id":"https://openalex.org/keywords/intersection","display_name":"Intersection (aeronautics)","score":0.4355413615703583},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4106637239456177},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.37521225214004517},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3293691873550415}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9841879606246948},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8317451477050781},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6733834147453308},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5510818362236023},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5246919989585876},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4673972725868225},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.461683452129364},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.46129047870635986},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.44117897748947144},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.4355413615703583},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4106637239456177},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.37521225214004517},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3293691873550415},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/e26100876","is_oa":true,"landing_page_url":"https://doi.org/10.3390/e26100876","pdf_url":"https://www.mdpi.com/1099-4300/26/10/876/pdf?version=1729499930","source":{"id":"https://openalex.org/S195231649","display_name":"Entropy","issn_l":"1099-4300","issn":["1099-4300"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Entropy","raw_type":"journal-article"},{"id":"pmid:39451952","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39451952","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Entropy (Basel, Switzerland)","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:11507651","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/11507651","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11507651/pdf/entropy-26-00876.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Entropy (Basel)","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:e3720435c5de4f69ba4c7f29b8bd608f","is_oa":false,"landing_page_url":"https://doaj.org/article/e3720435c5de4f69ba4c7f29b8bd608f","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Entropy, Vol 26, Iss 10, p 876 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/e26100876","is_oa":true,"landing_page_url":"https://doi.org/10.3390/e26100876","pdf_url":"https://www.mdpi.com/1099-4300/26/10/876/pdf?version=1729499930","source":{"id":"https://openalex.org/S195231649","display_name":"Entropy","issn_l":"1099-4300","issn":["1099-4300"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Entropy","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G3221940651","display_name":null,"funder_award_id":"2023KTSCX186","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3391714663","display_name":null,"funder_award_id":"U21A20390","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4715889894","display_name":null,"funder_award_id":"20240601039RC","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403601779.pdf","grobid_xml":"https://content.openalex.org/works/W4403601779.grobid-xml"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1905882502","https://openalex.org/W1962622193","https://openalex.org/W2097606805","https://openalex.org/W2101105183","https://openalex.org/W2123301721","https://openalex.org/W2250378130","https://openalex.org/W2277195237","https://openalex.org/W2463955103","https://openalex.org/W2506483933","https://openalex.org/W2533800772","https://openalex.org/W2558834163","https://openalex.org/W2579549467","https://openalex.org/W2592929672","https://openalex.org/W2608231518","https://openalex.org/W2745461083","https://openalex.org/W2754927243","https://openalex.org/W2795151422","https://openalex.org/W2807697862","https://openalex.org/W2886970679","https://openalex.org/W2890531016","https://openalex.org/W2896348597","https://openalex.org/W2904993015","https://openalex.org/W2950626540","https://openalex.org/W2963084599","https://openalex.org/W2963536419","https://openalex.org/W3010277541","https://openalex.org/W3031696893","https://openalex.org/W3031914912","https://openalex.org/W3034655362","https://openalex.org/W3034984754","https://openalex.org/W3035017890","https://openalex.org/W3092462694","https://openalex.org/W3115720040","https://openalex.org/W3174377922","https://openalex.org/W3176587734","https://openalex.org/W3204358542","https://openalex.org/W3205071568","https://openalex.org/W3217347476","https://openalex.org/W4213453379","https://openalex.org/W4312924260","https://openalex.org/W4375953277","https://openalex.org/W4380200507","https://openalex.org/W4382677742","https://openalex.org/W4385245566","https://openalex.org/W4386750045","https://openalex.org/W4388579765","https://openalex.org/W4391320510","https://openalex.org/W4393392493","https://openalex.org/W6682631176","https://openalex.org/W6739901393","https://openalex.org/W6766818547","https://openalex.org/W6772224359","https://openalex.org/W6838701535"],"related_works":["https://openalex.org/W2172231696","https://openalex.org/W2089969684","https://openalex.org/W2002918846","https://openalex.org/W2503910294","https://openalex.org/W2034539438","https://openalex.org/W2384506582","https://openalex.org/W2019897126","https://openalex.org/W3009270862","https://openalex.org/W2120663665","https://openalex.org/W1787428848"],"abstract_inverted_index":{"With":[0],"the":[1,19,42,94,98,101,159,174,180,183,193,209,212,218,222,225,237,240,244,247,255,258,262,275],"development":[2],"of":[3,21,30,44,83,93,100,182,214,221,239,261],"artificial":[4],"intelligence":[5],"and":[6,24,60,96,110,138,150,176,195,224,232],"deep":[7],"learning":[8],"technologies,":[9],"image":[10,31,56,70,95,118,165,175,184,223],"captioning":[11,32,71,119],"has":[12,48],"become":[13],"an":[14,84],"important":[15],"research":[16],"direction":[17],"at":[18],"intersection":[20],"computer":[22],"vision":[23],"natural":[25,37],"language":[26,38],"processing.":[27],"The":[28,250,264],"purpose":[29],"is":[33],"to":[34,105,123,134,141,147,207,229],"generate":[35,75,148,230],"corresponding":[36],"descriptions":[39],"by":[40,77],"understanding":[41,99],"content":[43],"images.":[45,283],"This":[46],"technology":[47],"broad":[49],"application":[50],"prospects":[51],"in":[52,266],"fields":[53],"such":[54],"as":[55,208],"retrieval,":[57],"autonomous":[58],"driving,":[59],"visual":[61,129],"question":[62],"answering.":[63],"Currently,":[64],"many":[65],"researchers":[66],"have":[67],"proposed":[68],"region-based":[69],"methods.":[72],"These":[73],"methods":[74,120,145],"captions":[76,106,135,194],"extracting":[78],"features":[79,92],"from":[80,128,173,192],"different":[81],"regions":[82],"image.":[85],"However,":[86],"they":[87],"often":[88],"rely":[89],"on":[90,246],"local":[91],"overlook":[97],"overall":[102,259],"scene,":[103],"leading":[104],"that":[107,254,274],"lack":[108],"coherence":[109],"accuracy":[111],"when":[112,280],"dealing":[113],"with":[114,136],"complex":[115],"scenes.":[116],"Additionally,":[117],"are":[121],"unable":[122],"extract":[124],"complete":[125,231],"semantic":[126,190,197,227],"information":[127,198,220,228],"data,":[130],"which":[131,204],"may":[132],"lead":[133],"biases":[137],"deficiencies.":[139],"Due":[140],"these":[142],"reasons,":[143],"existing":[144],"struggle":[146],"comprehensive":[149],"accurate":[151,233],"captions.":[152,234,263],"To":[153,235],"fill":[154],"this":[155],"gap,":[156],"we":[157,205,242],"propose":[158],"Semantic":[160],"Scenes":[161],"Encoder":[162],"(SSE)":[163],"for":[164],"captioning.":[166],"It":[167],"first":[168],"extracts":[169,188],"a":[170,189,200],"scene":[171],"graph":[172,191],"integrates":[177],"it":[178,187,216],"into":[179],"encoding":[181],"information.":[185],"Then,":[186],"preserves":[196],"through":[199],"learnable":[201],"attention":[202],"mechanism,":[203],"refer":[206],"dictionary.":[210],"During":[211],"generation":[213],"captions,":[215],"combines":[217],"encoded":[219],"learned":[226],"verify":[236],"effectiveness":[238],"SSE,":[241],"tested":[243],"model":[245],"MSCOCO":[248],"dataset.":[249],"experimental":[251],"results":[252],"show":[253],"SSE":[256,276],"improves":[257],"quality":[260],"improvement":[265],"scores":[267],"across":[268],"multiple":[269],"evaluation":[270],"metrics":[271],"further":[272],"demonstrates":[273],"possesses":[277],"significant":[278],"advantages":[279],"processing":[281],"identical":[282]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
