{"id":"https://openalex.org/W4413125622","doi":"https://doi.org/10.1109/tmm.2025.3595019","title":"Video Segmentation and Tokenization for Model-Based Video Scene Classification","display_name":"Video Segmentation and Tokenization for Model-Based Video Scene Classification","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4413125622","doi":"https://doi.org/10.1109/tmm.2025.3595019"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2025.3595019","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3595019","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100434900","display_name":"Qing Wang","orcid":"https://orcid.org/0000-0003-3843-3920"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qing Wang","raw_affiliation_strings":["University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078239339","display_name":"Yajian Wang","orcid":"https://orcid.org/0000-0002-5255-4449"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yajian Wang","raw_affiliation_strings":["University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100442521","display_name":"Hang Chen","orcid":"https://orcid.org/0000-0002-4355-6629"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hang Chen","raw_affiliation_strings":["University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100639310","display_name":"Shuxian Wang","orcid":"https://orcid.org/0000-0001-8526-0845"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuxian Wang","raw_affiliation_strings":["University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066595711","display_name":"Jun Du","orcid":"https://orcid.org/0000-0002-2387-0389"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Du","raw_affiliation_strings":["University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111937761","display_name":"Chin-Hui Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chin-Hui Lee","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100434900"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23683473,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9731000065803528,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9690999984741211,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8759413957595825},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6416028738021851},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6216896176338196},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5524857044219971},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.47821351885795593},{"id":"https://openalex.org/keywords/lexical-analysis","display_name":"Lexical analysis","score":0.4188492000102997},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40641582012176514},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.33014804124832153},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.32568639516830444}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8759413957595825},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6416028738021851},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6216896176338196},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5524857044219971},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.47821351885795593},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.4188492000102997},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40641582012176514},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.33014804124832153},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32568639516830444}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3595019","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3595019","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4405003489","https://openalex.org/W4386014872","https://openalex.org/W1847536016","https://openalex.org/W4361193986","https://openalex.org/W3149094754","https://openalex.org/W2148703997","https://openalex.org/W4366851046","https://openalex.org/W3172681236","https://openalex.org/W2033371749","https://openalex.org/W1522196789"],"abstract_inverted_index":{"In":[0,185],"this":[1],"paper,":[2],"we":[3,67],"propose":[4],"a":[5,12,17,79,97,120,128,154],"novel":[6],"approach":[7,86],"for":[8,34],"segmenting":[9],"and":[10,27,54,115,137,161,166],"tokenizing":[11],"video":[13,35,80,116,129,183],"scene":[14,36,81,101,130,142,197],"recording":[15],"into":[16,96,126],"sequence":[18],"of":[19,113,122,163,209],"cascade":[20],"units,":[21],"known":[22],"as":[23,52],"visual":[24,30,46,123],"segment":[25,31,60,124],"units":[26,61,77,125],"modeled":[28],"with":[29],"models":[32,55],"(VSMs)":[33],"classification":[37,102,198],"(VSC).":[38],"Specifically,":[39],"the":[40,56,84,110,158,172,178,195,201,207],"proposed":[41,173,211],"VSM":[42,76,85],"framework":[43,174],"takes":[44],"deep":[45,98],"features":[47],"extracted":[48],"from":[49],"pre-trained":[50],"encoders":[51],"inputs":[53],"temporal":[57],"interactions":[58],"between":[59,75],"by":[62,157],"hidden":[63],"Markov":[64],"models.":[65],"Next,":[66],"use":[68],"unit":[69],"co-occurrence":[70],"statistics":[71],"to":[72,89,107,148],"introduce":[73],"relationships":[74],"within":[78,145],"recording.":[82],"Furthermore,":[83],"is":[87],"extended":[88],"an":[90],"acoustic-visual":[91],"variant,":[92],"subsequently":[93],"integrating":[94],"itself":[95],"learning-based":[99],"multi-modal":[100,188],"system.":[103],"This":[104],"combination":[105],"serves":[106],"further":[108],"exploit":[109],"complementary":[111],"nature":[112],"audio":[114],"data.":[117],"By":[118],"incorporating":[119],"set":[121],"modeling":[127],"class,":[131],"it":[132],"captures":[133],"both":[134],"inter-class":[135],"similarity":[136],"intra-class":[138],"diversity,":[139],"facilitating":[140],"improved":[141],"classification,":[143],"especially":[144],"categories":[146],"prone":[147],"confusion.":[149],"Extensive":[150],"experimental":[151],"results":[152],"on":[153],"benchmark":[155],"published":[156],"DCASE":[159,202],"(Detection":[160],"Classification":[162],"Acoustic":[164],"Scenes":[165],"Events)":[167],"2021":[168,203],"Challenge":[169],"show":[170],"that":[171],"can":[175],"effectively":[176],"handle":[177],"confusion":[179],"issue":[180],"among":[181],"similar":[182],"scenes.":[184],"addition,":[186],"our":[187,210],"integration":[189],"system":[190],"achieves":[191],"state-of-the-art":[192],"performance":[193],"in":[194,200],"audio-visual":[196],"task":[199],"Challenge,":[204],"thereby":[205],"demonstrating":[206],"effectiveness":[208],"approach.":[212]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
