{"id":"https://openalex.org/W4402963285","doi":"https://doi.org/10.1145/3664647.3681358","title":"HICEScore: A Hierarchical Metric for Image Captioning Evaluation","display_name":"HICEScore: A Hierarchical Metric for Image Captioning Evaluation","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4402963285","doi":"https://doi.org/10.1145/3664647.3681358"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681358","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681358","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2407.18589","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037329824","display_name":"Zequn Zeng","orcid":"https://orcid.org/0009-0006-3410-8513"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zequn Zeng","raw_affiliation_strings":["National Key Laboratory of Radar Signal Processing, Xidian University, Xi'an, Shaanxi, China","State Key Laboratory of Integrated Service Networks, Xidian University Xi'an, China"],"raw_orcid":"https://orcid.org/0009-0006-3410-8513","affiliations":[{"raw_affiliation_string":"National Key Laboratory of Radar Signal Processing, Xidian University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"State Key Laboratory of Integrated Service Networks, Xidian University Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062432846","display_name":"Jianqiao Sun","orcid":"https://orcid.org/0000-0002-8193-7940"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianqiao Sun","raw_affiliation_strings":["National Key Laboratory of Radar Signal Processing, Xidian University, Xi'an, China","State Key Laboratory of Integrated Service Networks, Xidian University Xi'an, China"],"raw_orcid":"https://orcid.org/0000-0002-8193-7940","affiliations":[{"raw_affiliation_string":"National Key Laboratory of Radar Signal Processing, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"State Key Laboratory of Integrated Service Networks, Xidian University Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100396885","display_name":"Hao Zhang","orcid":"https://orcid.org/0000-0002-2928-2692"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Zhang","raw_affiliation_strings":["National Key Laboratory of Radar Signal Processing, Xidian University, Xi'an, China","State Key Laboratory of Integrated Service Networks, Xidian University Xi'an, China"],"raw_orcid":"https://orcid.org/0000-0002-2928-2692","affiliations":[{"raw_affiliation_string":"National Key Laboratory of Radar Signal Processing, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"State Key Laboratory of Integrated Service Networks, Xidian University Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047153234","display_name":"Tianyang Wen","orcid":"https://orcid.org/0009-0005-7911-7694"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tiansheng Wen","raw_affiliation_strings":["National Key Laboratory of Radar Signal Processing, Xidian University, Xi'an, China","State Key Laboratory of Integrated Service Networks, Xidian University Xi'an, China"],"raw_orcid":"https://orcid.org/0009-0005-7911-7694","affiliations":[{"raw_affiliation_string":"National Key Laboratory of Radar Signal Processing, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"State Key Laboratory of Integrated Service Networks, Xidian University Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110984362","display_name":"Yudi Su","orcid":null},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yudi Su","raw_affiliation_strings":["National Key Laboratory of Radar Signal Processing, Xidian university, Xi'an, China","State Key Laboratory of Integrated Service Networks, Xidian University Xi'an, China"],"raw_orcid":"https://orcid.org/0009-0006-4938-0683","affiliations":[{"raw_affiliation_string":"National Key Laboratory of Radar Signal Processing, Xidian university, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"State Key Laboratory of Integrated Service Networks, Xidian University Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yan Xie","orcid":"https://orcid.org/0009-0008-2929-8636"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Xie","raw_affiliation_strings":["National Key Laboratory of Radar Signal Processing, Xidian University, Xi'an, China"],"raw_orcid":"https://orcid.org/0009-0008-2929-8636","affiliations":[{"raw_affiliation_string":"National Key Laboratory of Radar Signal Processing, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068379071","display_name":"Zhengjue Wang","orcid":"https://orcid.org/0000-0002-1846-495X"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhengjue Wang","raw_affiliation_strings":["State Key Laboratory of Integrated Service Networks, Xidian University, Xi'an, China","National Key Laboratory of Radar Signal Processing, Xidian University Xi'an, China"],"raw_orcid":"https://orcid.org/0000-0002-1846-495X","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Integrated Service Networks, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"National Key Laboratory of Radar Signal Processing, Xidian University Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100427253","display_name":"Bo Chen","orcid":"https://orcid.org/0000-0001-5151-9388"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Chen","raw_affiliation_strings":["National Key Laboratory of Radar Signal Processing, Xidian University, Xi'an, China","State Key Laboratory of Integrated Service Networks, Xidian University Xi'an, China"],"raw_orcid":"https://orcid.org/0000-0001-5151-9388","affiliations":[{"raw_affiliation_string":"National Key Laboratory of Radar Signal Processing, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"State Key Laboratory of Integrated Service Networks, Xidian University Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.431,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.62514913,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"866","last_page":"875"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9431845545768738},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8182981014251709},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6606801152229309},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5600994229316711},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5541502833366394},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.46392449736595154},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4399261474609375},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.43658357858657837},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.41348201036453247},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3491809368133545}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9431845545768738},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8182981014251709},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6606801152229309},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5600994229316711},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5541502833366394},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.46392449736595154},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4399261474609375},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43658357858657837},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.41348201036453247},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3491809368133545},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3664647.3681358","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681358","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2407.18589","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.18589","pdf_url":"https://arxiv.org/pdf/2407.18589","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2407.18589","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.18589","pdf_url":"https://arxiv.org/pdf/2407.18589","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1425959807","display_name":null,"funder_award_id":"QTZX24003","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G1474490597","display_name":null,"funder_award_id":"111 Project","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4245713565","display_name":null,"funder_award_id":"QTZX22160","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G5631434781","display_name":null,"funder_award_id":"B18039","funder_id":"https://openalex.org/F4320327912","funder_display_name":"Higher Education Discipline Innovation Project"},{"id":"https://openalex.org/G5946977168","display_name":null,"funder_award_id":"B18039","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G7843506534","display_name":null,"funder_award_id":"U21B2006","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8879567429","display_name":null,"funder_award_id":"B18039","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320327912","display_name":"Higher Education Discipline Innovation Project","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402963285.pdf","grobid_xml":"https://content.openalex.org/works/W4402963285.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W1593271688","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1956340063","https://openalex.org/W2035866663","https://openalex.org/W2128856065","https://openalex.org/W2139501017","https://openalex.org/W2142112143","https://openalex.org/W2185175083","https://openalex.org/W2302086703","https://openalex.org/W2339652278","https://openalex.org/W2600463316","https://openalex.org/W2745461083","https://openalex.org/W2962935746","https://openalex.org/W2963954913","https://openalex.org/W2964042428","https://openalex.org/W2970858040","https://openalex.org/W3000226596","https://openalex.org/W3034417909","https://openalex.org/W3034655362","https://openalex.org/W3098358988","https://openalex.org/W3173220247","https://openalex.org/W3174151851","https://openalex.org/W3214192224","https://openalex.org/W4221147537","https://openalex.org/W4281735693","https://openalex.org/W4281873319","https://openalex.org/W4312563428","https://openalex.org/W4379141755","https://openalex.org/W4386075661","https://openalex.org/W4386076674","https://openalex.org/W4390874575","https://openalex.org/W4391109864","https://openalex.org/W4402703029"],"related_works":["https://openalex.org/W4388002133","https://openalex.org/W3164229987","https://openalex.org/W3215212336","https://openalex.org/W4290852288","https://openalex.org/W3217388757","https://openalex.org/W3122720459","https://openalex.org/W4298897568","https://openalex.org/W4289422896","https://openalex.org/W1938708284","https://openalex.org/W4380190185"],"abstract_inverted_index":{"Image":[0,128],"captioning":[1,124],"evaluation":[2,94],"metrics":[3,11,48,177,184],"can":[4],"be":[5],"divided":[6],"into":[7],"two":[8],"categories,":[9],"reference-based":[10,16,183],"and":[12,77,104,138,180,182,187],"reference-free":[13,47,120,158,176],"metrics.":[14,159],"However,":[15],"approaches":[17],"may":[18],"struggle":[19],"to":[20,36,80,90],"evaluate":[21],"descriptive":[22],"captions":[23,202],"with":[24],"abundant":[25],"visual":[26,82,106,136],"details":[27],"produced":[28],"by":[29,61],"advanced":[30],"multimodal":[31],"large":[32],"language":[33],"models,":[34],"due":[35],"their":[37,62,85],"heavy":[38],"reliance":[39],"on":[40,171,200],"limited":[41],"human-annotated":[42],"references.":[43],"In":[44],"contrast,":[45],"previous":[46],"have":[49,69,109],"been":[50,111],"proven":[51],"effective":[52],"via":[53],"CLIP":[54],"cross-modality":[55],"similarity.":[56],"Nonetheless,":[57],"CLIP-based":[58],"metrics,":[59],"constrained":[60],"solution":[63],"of":[64,101,152,156,198],"global":[65],"image-text":[66],"compatibility,":[67],"often":[68],"a":[70,118],"deficiency":[71],"in":[72],"detecting":[73,134],"local":[74,135],"textual":[75,139],"hallucinations":[76],"are":[78,88],"insensitive":[79],"small":[81],"objects.":[83],"Besides,":[84],"single-scale":[86,154],"designs":[87],"unable":[89],"provide":[91],"an":[92,143],"interpretable":[93,144,205],"process":[95,197],"such":[96],"as":[97],"pinpointing":[98],"the":[99,150,153,168,195],"position":[100],"caption":[102],"mistakes":[103],"identifying":[105],"regions":[107,137],"that":[108,163,194],"not":[110],"described.":[112],"To":[113],"move":[114],"forward,":[115],"we":[116],"propose":[117],"novel":[119],"metric":[121,166],"for":[122],"image":[123],"evaluation,":[125],"dubbed":[126],"Hierarchical":[127],"Captioning":[129],"Evaluation":[130],"Score":[131],"(HICE-S).":[132],"By":[133],"phrases,":[140],"HICE-S":[141,199],"builds":[142],"hierarchical":[145],"scoring":[146],"mechanism,":[147],"breaking":[148],"through":[149],"barriers":[151],"structure":[155],"existing":[157,175],"Comprehensive":[160],"experiments":[161],"indicate":[162],"our":[164],"proposed":[165],"achieves":[167],"SOTA":[169],"performance":[170],"several":[172,190],"benchmarks,":[173],"outperforming":[174],"like":[178,185],"CLIP-S":[179],"PAC-S,":[181],"METEOR":[186],"CIDEr.":[188],"Moreover,":[189],"case":[191],"studies":[192],"reveal":[193],"assessment":[196],"detailed":[201],"closely":[203],"resembles":[204],"human":[206],"judgments.Our":[207],"code":[208],"is":[209],"available":[210],"at":[211],"https://github.com/joeyz0z/HICE.":[212]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-20T22:02:38.213706","created_date":"2025-10-10T00:00:00"}
