{"id":"https://openalex.org/W4402572275","doi":"https://doi.org/10.1145/3695878","title":"Diverse Image Captioning via Panoptic Segmentation and Sequential Conditional Variational Transformer","display_name":"Diverse Image Captioning via Panoptic Segmentation and Sequential Conditional Variational Transformer","publication_year":2024,"publication_date":"2024-09-17","ids":{"openalex":"https://openalex.org/W4402572275","doi":"https://doi.org/10.1145/3695878"},"language":"en","primary_location":{"id":"doi:10.1145/3695878","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3695878","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100339921","display_name":"Bing Liu","orcid":"https://orcid.org/0000-0002-2365-6606"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bing Liu","raw_affiliation_strings":["School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou,\rJiangsu, China","School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou,\rJiangsu, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, Jiangsu, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107604028","display_name":"Jinfu Lu","orcid":"https://orcid.org/0009-0008-8022-2346"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinfu Lu","raw_affiliation_strings":["School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou,\rJiangsu, China","School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou,\rJiangsu, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, Jiangsu, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100448074","display_name":"Mingming Liu","orcid":"https://orcid.org/0000-0002-5698-8308"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingming Liu","raw_affiliation_strings":["School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou,\rJiangsu, China","School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou,\rJiangsu, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, Jiangsu, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100458749","display_name":"Hao Liu","orcid":"https://orcid.org/0000-0001-6728-1773"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Liu","raw_affiliation_strings":["School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou,\rJiangsu, China","School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou,\rJiangsu, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, Jiangsu, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025079159","display_name":"Yong Zhou","orcid":"https://orcid.org/0000-0001-6207-0299"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Zhou","raw_affiliation_strings":["School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou,\rJiangsu, China","School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou,\rJiangsu, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, Jiangsu, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"last","author":{"id":null,"display_name":"Dongping Yang","orcid":"https://orcid.org/0009-0003-8048-5940"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongping Yang","raw_affiliation_strings":["School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou,\rJiangsu, China","School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou,\rJiangsu, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Computer Science and Technology, China University of Mining and Technology, Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, Jiangsu, China","institution_ids":["https://openalex.org/I25757504"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100339921"],"corresponding_institution_ids":["https://openalex.org/I25757504"],"apc_list":null,"apc_paid":null,"fwci":0.245,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.52311655,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"20","issue":"12","first_page":"1","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.84360671043396},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8020999431610107},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5843367576599121},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5619072318077087},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5496946573257446},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5398598909378052},{"id":"https://openalex.org/keywords/panopticon","display_name":"Panopticon","score":0.5125951170921326},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.4822148084640503},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4423127770423889},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.424790620803833},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3669935464859009},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.08503326773643494}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.84360671043396},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8020999431610107},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5843367576599121},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5619072318077087},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5496946573257446},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5398598909378052},{"id":"https://openalex.org/C138569888","wikidata":"https://www.wikidata.org/wiki/Q828310","display_name":"Panopticon","level":3,"score":0.5125951170921326},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4822148084640503},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4423127770423889},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.424790620803833},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3669935464859009},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.08503326773643494},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3695878","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3695878","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2574202113","display_name":null,"funder_award_id":"Nos. 62276266, 61801198","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1593271688","https://openalex.org/W1861492603","https://openalex.org/W1956340063","https://openalex.org/W2133459682","https://openalex.org/W2463955103","https://openalex.org/W2506483933","https://openalex.org/W2604178507","https://openalex.org/W2753215597","https://openalex.org/W2788277448","https://openalex.org/W2797029597","https://openalex.org/W2897964062","https://openalex.org/W2906314281","https://openalex.org/W2962968835","https://openalex.org/W2963084599","https://openalex.org/W2963467339","https://openalex.org/W2966162142","https://openalex.org/W2982553922","https://openalex.org/W2988793532","https://openalex.org/W3034655362","https://openalex.org/W3035160838","https://openalex.org/W3035284526","https://openalex.org/W3047153790","https://openalex.org/W3097062010","https://openalex.org/W3106925514","https://openalex.org/W3138516171","https://openalex.org/W3167939936","https://openalex.org/W3174377922","https://openalex.org/W3175824375","https://openalex.org/W3189654197","https://openalex.org/W3201519611","https://openalex.org/W4221147537","https://openalex.org/W4226397058","https://openalex.org/W4285602612","https://openalex.org/W4312263794","https://openalex.org/W4312815172","https://openalex.org/W4312897353","https://openalex.org/W4313131769","https://openalex.org/W4375869407","https://openalex.org/W4385762291"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2921107741","https://openalex.org/W2197002326","https://openalex.org/W2494728058","https://openalex.org/W4310447809","https://openalex.org/W4200243030","https://openalex.org/W2800782462","https://openalex.org/W3209117276","https://openalex.org/W4388184981","https://openalex.org/W4323777661"],"abstract_inverted_index":{"Recently,":[0],"transformer-based":[1],"image":[2,23,83,91,113,136],"captioning":[3,137],"models":[4],"have":[5],"achieved":[6],"significant":[7],"performance":[8,130],"improvement.":[9],"However,":[10],"due":[11],"to":[12,48,115],"the":[13,50,58,63,90,96,106,133],"limitations":[14],"of":[15,108],"region":[16,52,65,98],"visual":[17,34,59,66,92],"features":[18,35,99],"and":[19,25,36,100,128],"deterministic":[20],"projections":[21],"between":[22],"space":[24,114],"caption":[26,116],"space,":[27],"existing":[28],"methods":[29],"still":[30],"suffer":[31],"from":[32,112],"disentangled":[33],"rigid":[37],"sentences.":[38],"To":[39],"address":[40],"these":[41],"issues,":[42],"we":[43,69],"first":[44],"introduce":[45],"panoptic":[46,72],"segmentation":[47,51,73,97],"extract":[49],"features,":[53,103],"which":[54,85],"can":[55],"effectively":[56],"alleviate":[57],"confusion":[60],"caused":[61],"by":[62,94],"widely-adopted":[64],"features.":[67],"Then,":[68],"propose":[70],"a":[71],"based":[74],"sequential":[75],"conditional":[76],"variational":[77],"transformer":[78],"(PS-SCVT)":[79],"framework":[80],"for":[81],"diverse":[82,135],"captioning,":[84],"not":[86],"only":[87],"accurately":[88],"extracts":[89],"representations":[93],"fusing":[95],"object":[101],"detection":[102],"but":[104],"has":[105],"ability":[107],"learning":[109],"one-to-many":[110],"mappings":[111],"space.":[117],"The":[118],"experimental":[119],"results":[120],"demonstrate":[121],"that":[122],"our":[123],"approach":[124],"achieves":[125],"better":[126],"interpretability":[127],"generalization":[129],"compared":[131],"with":[132],"state-of-the-art":[134],"models.":[138]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
