{"id":"https://openalex.org/W2771870130","doi":"https://doi.org/10.1109/mmsp.2017.8122267","title":"Robust video scene detection using multimodal fusion of optimally grouped features","display_name":"Robust video scene detection using multimodal fusion of optimally grouped features","publication_year":2017,"publication_date":"2017-10-01","ids":{"openalex":"https://openalex.org/W2771870130","doi":"https://doi.org/10.1109/mmsp.2017.8122267","mag":"2771870130"},"language":"en","primary_location":{"id":"doi:10.1109/mmsp.2017.8122267","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp.2017.8122267","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE 19th International Workshop on Multimedia Signal Processing (MMSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015846077","display_name":"Daniel Rotman","orcid":null},"institutions":[{"id":"https://openalex.org/I4210167297","display_name":"IBM Research - Haifa","ror":"https://ror.org/05rw9t746","country_code":"IL","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210167297"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Daniel Rotman","raw_affiliation_strings":["IBM Research, Haifa, Israel"],"affiliations":[{"raw_affiliation_string":"IBM Research, Haifa, Israel","institution_ids":["https://openalex.org/I4210167297"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026891553","display_name":"Dror Porat","orcid":null},"institutions":[{"id":"https://openalex.org/I4210167297","display_name":"IBM Research - Haifa","ror":"https://ror.org/05rw9t746","country_code":"IL","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210167297"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Dror Porat","raw_affiliation_strings":["IBM Research, Haifa, Israel"],"affiliations":[{"raw_affiliation_string":"IBM Research, Haifa, Israel","institution_ids":["https://openalex.org/I4210167297"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047132423","display_name":"Gal Ashour","orcid":null},"institutions":[{"id":"https://openalex.org/I4210167297","display_name":"IBM Research - Haifa","ror":"https://ror.org/05rw9t746","country_code":"IL","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210167297"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Gal Ashour","raw_affiliation_strings":["IBM Research, Haifa, Israel"],"affiliations":[{"raw_affiliation_string":"IBM Research, Haifa, Israel","institution_ids":["https://openalex.org/I4210167297"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5015846077"],"corresponding_institution_ids":["https://openalex.org/I4210167297"],"apc_list":null,"apc_paid":null,"fwci":0.4551,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.73643981,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8631918430328369},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.6904162168502808},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.61076819896698},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5923792123794556},{"id":"https://openalex.org/keywords/video-browsing","display_name":"Video browsing","score":0.587873101234436},{"id":"https://openalex.org/keywords/video-post-processing","display_name":"Video post-processing","score":0.587531566619873},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5766362547874451},{"id":"https://openalex.org/keywords/video-processing","display_name":"Video processing","score":0.5198074579238892},{"id":"https://openalex.org/keywords/video-compression-picture-types","display_name":"Video compression picture types","score":0.5103074908256531},{"id":"https://openalex.org/keywords/multiview-video-coding","display_name":"Multiview Video Coding","score":0.477924644947052},{"id":"https://openalex.org/keywords/video-denoising","display_name":"Video denoising","score":0.474477082490921},{"id":"https://openalex.org/keywords/video-editing","display_name":"Video editing","score":0.4573823809623718},{"id":"https://openalex.org/keywords/smacker-video","display_name":"Smacker video","score":0.43126043677330017}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8631918430328369},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.6904162168502808},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.61076819896698},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5923792123794556},{"id":"https://openalex.org/C2775856596","wikidata":"https://www.wikidata.org/wiki/Q25141683","display_name":"Video browsing","level":4,"score":0.587873101234436},{"id":"https://openalex.org/C117090137","wikidata":"https://www.wikidata.org/wiki/Q7927977","display_name":"Video post-processing","level":5,"score":0.587531566619873},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5766362547874451},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.5198074579238892},{"id":"https://openalex.org/C106030495","wikidata":"https://www.wikidata.org/wiki/Q1797012","display_name":"Video compression picture types","level":4,"score":0.5103074908256531},{"id":"https://openalex.org/C23431618","wikidata":"https://www.wikidata.org/wiki/Q1404672","display_name":"Multiview Video Coding","level":4,"score":0.477924644947052},{"id":"https://openalex.org/C30814859","wikidata":"https://www.wikidata.org/wiki/Q4119603","display_name":"Video denoising","level":5,"score":0.474477082490921},{"id":"https://openalex.org/C2780310081","wikidata":"https://www.wikidata.org/wiki/Q1154312","display_name":"Video editing","level":2,"score":0.4573823809623718},{"id":"https://openalex.org/C108803254","wikidata":"https://www.wikidata.org/wiki/Q857512","display_name":"Smacker video","level":4,"score":0.43126043677330017}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mmsp.2017.8122267","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp.2017.8122267","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE 19th International Workshop on Multimedia Signal Processing (MMSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1597863258","https://openalex.org/W1993518282","https://openalex.org/W2010070281","https://openalex.org/W2053611776","https://openalex.org/W2071949631","https://openalex.org/W2117223204","https://openalex.org/W2126780644","https://openalex.org/W2134362293","https://openalex.org/W2142189054","https://openalex.org/W2144180849","https://openalex.org/W2151333650","https://openalex.org/W2151617679","https://openalex.org/W2395845035","https://openalex.org/W2488838104","https://openalex.org/W2578494166","https://openalex.org/W2645519508","https://openalex.org/W4248215879","https://openalex.org/W6682298731","https://openalex.org/W6723180624"],"related_works":["https://openalex.org/W1574724839","https://openalex.org/W2106318444","https://openalex.org/W2140582509","https://openalex.org/W1993097576","https://openalex.org/W2783749862","https://openalex.org/W2153784897","https://openalex.org/W3034776505","https://openalex.org/W1498574451","https://openalex.org/W2036708442","https://openalex.org/W2965570359"],"abstract_inverted_index":{"Video":[0,204],"scene":[1,35,62,122,180,251],"detection,":[2,181],"the":[3,26,58,126,173,176,184,187,225,230,241,247],"task":[4],"of":[5,21,29,60,77,128,139,175,186,192,249],"temporally":[6,119],"dividing":[7,99],"a":[8,65,72,75,100,105,153,160,198],"video":[9,23,30,34,47,54,61,73,101,121,140,165,179,250],"into":[10,88,102],"its":[11],"semantic":[12],"sections,":[13],"is":[14],"an":[15,93],"important":[16,41],"process":[17],"for":[18,32,45,98,216,246],"effective":[19,46,94],"analysis":[20],"heterogeneous":[22],"content.":[24,141],"With":[25],"increased":[27],"amount":[28],"available":[31,215],"consumption,":[33],"detection":[36,63,123],"becomes":[37],"more":[38,40],"and":[39,50,53,95,124,134,218],"by":[42],"providing":[43],"means":[44],"summarization,":[48],"search":[49],"retrieval,":[51],"browsing,":[52],"understanding.":[55],"We":[56],"formulate":[57],"problem":[59,68,248],"as":[64],"generic":[66],"optimization":[67],"aimed":[69],"at":[70],"partitioning":[71],"given":[74],"set":[76],"features":[78],"derived":[79],"from":[80,229],"multiple":[81],"modalities.":[82],"By":[83],"optimally":[84],"grouping":[85],"consecutive":[86],"shots":[87],"scenes,":[89],"our":[90,147,170,202],"method":[91,171],"presents":[92],"efficient":[96],"solution":[97],"sections":[103],"using":[104,158],"unique":[106],"dynamic":[107],"programming":[108],"scheme.":[109],"Unlike":[110],"existing":[111],"methods,":[112],"it":[113,132],"allows":[114],"us":[115],"to":[116,136,157,201,234],"directly":[117],"obtain":[118],"consistent":[120],"has":[125],"advantage":[127],"being":[129],"parameter-free,":[130],"making":[131],"robust":[133],"applicable":[135],"various":[137],"types":[138],"Our":[142],"experimental":[143],"results":[144],"show":[145],"that":[146],"proposed":[148,188],"multimodal":[149],"approach":[150],"can":[151],"provide":[152,197],"significant":[154,199],"gain":[155],"compared":[156],"only":[159],"single":[161],"modality":[162],"(e.g.,":[163],"either":[164],"or":[166],"audio":[167],"alone).":[168],"Additionally,":[169],"outperforms":[172],"state":[174],"art":[177],"in":[178],"clearly":[182],"demonstrating":[183],"effectiveness":[185],"method.":[189],"As":[190],"part":[191],"this":[193,239],"work,":[194],"we":[195],"also":[196],"extension":[200],"Open":[203],"Scene":[205],"Detection":[206],"dataset":[207,240],"(OVSD),":[208],"which":[209,223],"comprises":[210],"open":[211],"licensed":[212],"videos":[213],"freely":[214],"academic":[217],"industrial":[219],"use.":[220],"This":[221],"extension,":[222],"increases":[224],"OVSD's":[226],"cumulative":[227],"duration":[228],"original":[231],"2.5":[232],"hours":[233],"over":[235],"17":[236],"hours,":[237],"makes":[238],"most":[242],"extensive":[243],"evaluation":[244],"tool":[245],"detection.":[252]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
