{"id":"https://openalex.org/W4415541215","doi":"https://doi.org/10.1145/3746027.3755644","title":"VSumMamba: Mamba Empowered Efficient Video Summarization with Multi-Scale Spatial-Temporal Modeling","display_name":"VSumMamba: Mamba Empowered Efficient Video Summarization with Multi-Scale Spatial-Temporal Modeling","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415541215","doi":"https://doi.org/10.1145/3746027.3755644"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755644","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755644","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083351999","display_name":"Yanli Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yamiao Ding","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102949546","display_name":"Tianrui Liu","orcid":"https://orcid.org/0000-0001-7926-3310"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianrui Liu","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104310826","display_name":"Zhizhou Lu","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhizhou Lu","raw_affiliation_strings":["National University of Defence Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defence Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037501616","display_name":"Junjie Huang","orcid":"https://orcid.org/0000-0003-2986-4665"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun-Jie Huang","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103149594","display_name":"Wentao Zhao","orcid":"https://orcid.org/0000-0002-2906-3238"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wentao Zhao","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101727888","display_name":"Xinwang Liu","orcid":"https://orcid.org/0000-0001-9066-1475"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinwang Liu","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100377147","display_name":"Meng Wang","orcid":"https://orcid.org/0000-0002-3094-7735"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Wang","raw_affiliation_strings":["Hefei University of Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Hefei University of Technology, Hefei, China","institution_ids":["https://openalex.org/I16365422"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5083351999"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":1.428,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86828132,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"6549","last_page":"6557"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9606000185012817,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.9495000243186951},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.675000011920929},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4185999929904938},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.3813999891281128},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.3467000126838684},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.32260000705718994}],"concepts":[{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.9495000243186951},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7874000072479248},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.675000011920929},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4293000102043152},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4185999929904938},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.3813999891281128},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37130001187324524},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.3467000126838684},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.32260000705718994},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3158000111579895},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.30979999899864197},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C129844170","wikidata":"https://www.wikidata.org/wiki/Q41299","display_name":"Quadratic equation","level":2,"score":0.29820001125335693},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.28519999980926514},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28110000491142273},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26510000228881836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755644","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755644","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1904325426","https://openalex.org/W1924343884","https://openalex.org/W2069007596","https://openalex.org/W2139009685","https://openalex.org/W2529272619","https://openalex.org/W2737677090","https://openalex.org/W2766630207","https://openalex.org/W2781922022","https://openalex.org/W2798970487","https://openalex.org/W2883872876","https://openalex.org/W2903758693","https://openalex.org/W2906430987","https://openalex.org/W2963919999","https://openalex.org/W2964158702","https://openalex.org/W2964167369","https://openalex.org/W2967219836","https://openalex.org/W2987654501","https://openalex.org/W3025569967","https://openalex.org/W3027431227","https://openalex.org/W3087792975","https://openalex.org/W3090254005","https://openalex.org/W3107128832","https://openalex.org/W3174989968","https://openalex.org/W3196286078","https://openalex.org/W4225769600","https://openalex.org/W4361249544"],"related_works":[],"abstract_inverted_index":{"The":[0,53],"exponential":[1],"growth":[2],"of":[3,42],"video":[4,27,96],"content":[5],"necessitates":[6],"efficient":[7],"summarization":[8,28,114],"techniques":[9],"that":[10,30],"balance":[11,110],"local":[12],"redundancy":[13],"reduction":[14],"and":[15,78,113,127,132],"global":[16],"dependency":[17],"modeling.":[18],"In":[19],"this":[20],"work,":[21],"we":[22],"introduce":[23],"VSumMamba,":[24],"an":[25],"innovative":[26],"approach":[29],"leverages":[31],"Selective":[32],"State":[33],"Space":[34],"Models":[35],"to":[36,92,109,141],"address":[37],"the":[38,107],"quadratic":[39],"complexity":[40],"limitations":[41],"Transformer":[43],"based":[44],"approaches":[45],"meanwhile":[46],"surpassing":[47],"CNNs'":[48],"restricted":[49],"long-range":[50],"modeling":[51,103],"capabilities.":[52],"proposed":[54],"framework":[55],"comprises":[56],"three":[57,99],"core":[58],"components:":[59],"1)":[60],"a":[61,65,80],"Multi-Scale":[62],"Aggregator,":[63],"2)":[64],"Cascaded":[66],"Temporal":[67],"Modeling":[68,83],"Module":[69,84],"with":[70],"bi-directional":[71],"Mamba":[72,87],"blocks":[73],"for":[74],"temporal":[75],"representation":[76],"enhancement,":[77],"3)":[79],"Parallel":[81],"Spatial":[82],"employing":[85],"spatial":[86],"blocks,":[88],"operating":[89],"in":[90],"concert":[91],"effectively":[93],"refine":[94],"spatiotemporal":[95],"representations.":[97],"Through":[98],"specialized":[100],"multi-scale":[101],"spatial-temporal":[102],"schemes,":[104],"VSumMamba":[105],"demonstrate":[106,121],"ability":[108],"computational":[111,138],"efficiency":[112],"performance.":[115],"Comprehensive":[116],"evaluations":[117],"on":[118,130],"benchmarks":[119],"datasets":[120],"VSumMamba's":[122],"superior":[123],"performance,":[124],"achieving":[125],"67.5%":[126],"56.0%":[128],"F1-scores":[129],"TVSum":[131],"SumMe":[133],"respectively,":[134],"while":[135],"maintaining":[136],"lower":[137],"cost":[139],"compared":[140],"existing":[142],"state-of-the-art":[143],"methods.":[144]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-25T00:00:00"}
