{"id":"https://openalex.org/W4408355560","doi":"https://doi.org/10.1109/icassp49660.2025.10889534","title":"MncCap: Mining Neural Composition for Zero-shot Image Captioning via Text-only Training","display_name":"MncCap: Mining Neural Composition for Zero-shot Image Captioning via Text-only Training","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408355560","doi":"https://doi.org/10.1109/icassp49660.2025.10889534"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10889534","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889534","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100638096","display_name":"Tongtong Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tongtong Liu","raw_affiliation_strings":["Jilin University,College of Computer Science and Technology,Changchun,China"],"affiliations":[{"raw_affiliation_string":"Jilin University,College of Computer Science and Technology,Changchun,China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103247939","display_name":"Chen Yang","orcid":"https://orcid.org/0009-0004-2225-0833"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chen Yang","raw_affiliation_strings":["Jilin University,College of Computer Science and Technology,Changchun,China"],"affiliations":[{"raw_affiliation_string":"Jilin University,College of Computer Science and Technology,Changchun,China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100634295","display_name":"Guoqiang Chen","orcid":"https://orcid.org/0000-0002-6094-7883"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoqiang Chen","raw_affiliation_strings":["Jilin University,College of Computer Science and Technology,Changchun,China"],"affiliations":[{"raw_affiliation_string":"Jilin University,College of Computer Science and Technology,Changchun,China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Qinxu Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinxu Gao","raw_affiliation_strings":["Jilin University,College of Computer Science and Technology,Changchun,China"],"affiliations":[{"raw_affiliation_string":"Jilin University,College of Computer Science and Technology,Changchun,China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Enhua Song","orcid":null},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Enhua Song","raw_affiliation_strings":["Jilin University,College of Computer Science and Technology,Changchun,China"],"affiliations":[{"raw_affiliation_string":"Jilin University,College of Computer Science and Technology,Changchun,China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050190752","display_name":"Wen Hui Li","orcid":"https://orcid.org/0000-0002-1848-8855"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhui Li","raw_affiliation_strings":["Jilin University,College of Computer Science and Technology,Changchun,China"],"affiliations":[{"raw_affiliation_string":"Jilin University,College of Computer Science and Technology,Changchun,China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100638096"],"corresponding_institution_ids":["https://openalex.org/I194450716","https://openalex.org/I4210134929"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03340516,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9246000051498413,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9698730707168579},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7451921701431274},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.7104235291481018},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.6997301578521729},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.602982223033905},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5598228573799133},{"id":"https://openalex.org/keywords/composition","display_name":"Composition (language)","score":0.5373972654342651},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5197591185569763},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.44131016731262207},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.42033305764198303},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38858792185783386},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37779664993286133},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.35614001750946045},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1139879822731018},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.061399638652801514},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.053108394145965576}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9698730707168579},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7451921701431274},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.7104235291481018},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.6997301578521729},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.602982223033905},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5598228573799133},{"id":"https://openalex.org/C40231798","wikidata":"https://www.wikidata.org/wiki/Q1333743","display_name":"Composition (language)","level":2,"score":0.5373972654342651},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5197591185569763},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.44131016731262207},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.42033305764198303},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38858792185783386},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37779664993286133},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.35614001750946045},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1139879822731018},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.061399638652801514},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.053108394145965576},{"id":"https://openalex.org/C191897082","wikidata":"https://www.wikidata.org/wiki/Q11467","display_name":"Metallurgy","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10889534","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889534","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320337495","display_name":"Technology Development","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1773149199","https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2107627409","https://openalex.org/W2506483933","https://openalex.org/W2904565150","https://openalex.org/W2974979868","https://openalex.org/W2981165461","https://openalex.org/W3091588028","https://openalex.org/W3174377922","https://openalex.org/W4221147537","https://openalex.org/W4224933795","https://openalex.org/W4312922092","https://openalex.org/W4312938727","https://openalex.org/W4319777846","https://openalex.org/W4372337842","https://openalex.org/W4385567053","https://openalex.org/W4386076004","https://openalex.org/W4390873015","https://openalex.org/W4390889743","https://openalex.org/W4393154592","https://openalex.org/W4393156223","https://openalex.org/W6631190155","https://openalex.org/W6678262379","https://openalex.org/W6791353385","https://openalex.org/W6803567076","https://openalex.org/W6846271925","https://openalex.org/W6849799615","https://openalex.org/W6851149231"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W2963177403","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4283207562","https://openalex.org/W2330246314","https://openalex.org/W2949522393","https://openalex.org/W4289422896"],"abstract_inverted_index":{"Current":[0],"text-only":[1,73],"image":[2,15,69,139,154],"captioning":[3,16,70,155],"methods":[4],"leverage":[5],"the":[6,35,40,51,57,77,84,92,105,111,134,138,142,146],"shared":[7],"feature":[8,22],"space":[9],"of":[10,39,100,117,145],"CLIP":[11],"to":[12,47,75,95,132,140],"train":[13],"zero-shot":[14,68],"using":[17],"text":[18,93],"data":[19],"only,":[20],"leaving":[21],"associations":[23],"and":[24,50,86],"contextual":[25,101,143],"understanding":[26],"not":[27],"fully":[28],"explored.":[29],"Neurological":[30],"studies":[31],"have":[32],"revealed":[33],"that":[34,159],"anterior":[36],"temporal":[37],"lobes":[38],"brain":[41],"are":[42],"responsible":[43],"for":[44,67],"binding":[45],"attributes":[46],"specific":[48],"individuals":[49],"corresponding":[52],"collective":[53],"connections.":[54],"Inspired":[55],"by":[56,91],"above":[58],"studies,":[59],"we":[60,82,113],"propose":[61,114],"a":[62,97,115,123,127],"novel":[63],"Mining":[64],"Neural":[65],"Composition":[66],"(MncCap)":[71],"via":[72],"training":[74],"model":[76],"neural":[78],"composition.":[79],"During":[80,125],"training,":[81],"combine":[83],"global":[85],"local":[87],"fine-grained":[88],"features":[89,136],"provided":[90],"clues":[94],"achieve":[96],"stronger":[98],"ability":[99],"understanding.":[102],"To":[103],"express":[104],"relationship":[106],"from":[107],"discriminative":[108],"information":[109],"in":[110,137],"text,":[112],"strategy":[116],"converting":[118],"each":[119],"candidate":[120],"sentence":[121],"into":[122],"text-tree.":[124],"inference,":[126],"pre-trained":[128],"detector":[129],"is":[130],"used":[131],"obtain":[133],"ROI":[135],"improve":[141],"integrity":[144],"semantic":[147],"features.":[148],"Experimental":[149],"results":[150],"conducted":[151],"on":[152],"three":[153],"benchmark":[156],"datasets":[157],"show":[158],"our":[160],"framework":[161],"achieves":[162],"remarkable":[163],"performance":[164],"improvements.":[165]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
