{"id":"https://openalex.org/W4416035525","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.1308","title":"Sali4Vid: Saliency-Aware Video Reweighting and Adaptive Caption Retrieval for Dense Video Captioning","display_name":"Sali4Vid: Saliency-Aware Video Reweighting and Adaptive Caption Retrieval for Dense Video Captioning","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416035525","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.1308"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.1308","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.1308","pdf_url":"https://aclanthology.org/2025.emnlp-main.1308.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.1308.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108943847","display_name":"MinJu Jeon","orcid":null},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"MinJu Jeon","raw_affiliation_strings":["Hanyang University , South Korea"],"affiliations":[{"raw_affiliation_string":"Hanyang University , South Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002252409","display_name":"Si-Woo Kim","orcid":"https://orcid.org/0000-0001-8706-6181"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Si-Woo Kim","raw_affiliation_strings":["Hanyang University , South Korea"],"affiliations":[{"raw_affiliation_string":"Hanyang University , South Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064180074","display_name":"Ye-Chan Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Ye-Chan Kim","raw_affiliation_strings":["Hanyang University , South Korea"],"affiliations":[{"raw_affiliation_string":"Hanyang University , South Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088829266","display_name":"HyunGee Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"HyunGee Kim","raw_affiliation_strings":["Hanyang University , South Korea"],"affiliations":[{"raw_affiliation_string":"Hanyang University , South Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100393157","display_name":"Dong-Jin Kim","orcid":"https://orcid.org/0000-0001-7231-7494"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Dong-Jin Kim","raw_affiliation_strings":["Hanyang University , South Korea"],"affiliations":[{"raw_affiliation_string":"Hanyang University , South Korea","institution_ids":["https://openalex.org/I4575257"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5108943847"],"corresponding_institution_ids":["https://openalex.org/I4575257"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.3532588,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"25788","last_page":"25801"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5807999968528748,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5807999968528748,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.1906999945640564,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.08460000157356262,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8277000188827515},{"id":"https://openalex.org/keywords/video-retrieval","display_name":"Video retrieval","score":0.32760000228881836},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.2827000021934509},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.2800999879837036},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.26010000705718994}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8277000188827515},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7016000151634216},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5418999791145325},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4359000027179718},{"id":"https://openalex.org/C2983174267","wikidata":"https://www.wikidata.org/wiki/Q3775098","display_name":"Video retrieval","level":2,"score":0.32760000228881836},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2827000021934509},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2680000066757202},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.26010000705718994},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.23880000412464142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.1308","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.1308","pdf_url":"https://aclanthology.org/2025.emnlp-main.1308.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.1308","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.1308","pdf_url":"https://aclanthology.org/2025.emnlp-main.1308.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416035525.pdf","grobid_xml":"https://content.openalex.org/works/W4416035525.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Dense":[0],"video":[1,9,36,45,106,112],"captioning":[2],"aims":[3],"to":[4,31,85],"temporally":[5],"localize":[6],"events":[7],"in":[8],"and":[10,39,74,89,98,108],"generate":[11],"captions":[12,42],"for":[13,110],"each":[14],"event.While":[15],"recent":[16],"works":[17],"propose":[18,53],"end-to-end":[19],"models,":[20],"they":[21],"suffer":[22],"from":[23,43],"two":[24],"limitations:":[25],"(1)":[26],"applying":[27],"timestamp":[28,67],"supervision":[29],"only":[30],"text":[32],"while":[33],"treating":[34],"all":[35],"frames":[37],"equally,":[38],"(2)":[40],"retrieving":[41],"fixedsize":[44],"chunks,":[46],"overlooking":[47],"scene":[48,87],"transitions.To":[49],"address":[50],"these,":[51],"we":[52],"Sali4Vid,":[54],"a":[55],"simple":[56],"yet":[57],"effective":[58],"saliency-aware":[59],"framework.We":[60],"introduce":[61],"Saliency-aware":[62],"Video":[63],"Reweighting,":[64],"which":[65,79],"converts":[66],"annotations":[68],"into":[69],"sigmoid-based":[70],"frame":[71,83],"importance":[72],"weights,":[73],"Semantic-based":[75],"Adaptive":[76],"Caption":[77],"Retrieval,":[78],"segments":[80],"videos":[81],"by":[82],"similarity":[84],"capture":[86],"transitions":[88],"improve":[90],"caption":[91],"retrieval.Sali4Vid":[92],"achieves":[93],"state-of-the-art":[94],"results":[95],"on":[96],"YouCook2":[97],"ViTT,":[99],"demonstrating":[100],"the":[101],"benefit":[102],"of":[103],"jointly":[104],"improving":[105],"weighting":[107],"retrieval":[109],"dense":[111],"captioning.":[113],"1":[114]},"counts_by_year":[],"updated_date":"2026-03-10T14:07:55.174380","created_date":"2025-11-08T00:00:00"}
