{"id":"https://openalex.org/W2110794320","doi":"https://doi.org/10.1109/icassp.2006.1661202","title":"Comparing Audio and Video Segmentations for Music Videos Indexing","display_name":"Comparing Audio and Video Segmentations for Music Videos Indexing","publication_year":2006,"publication_date":"2006-08-02","ids":{"openalex":"https://openalex.org/W2110794320","doi":"https://doi.org/10.1109/icassp.2006.1661202","mag":"2110794320"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2006.1661202","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2006.1661202","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2006 IEEE International Conference on Acoustics Speed and Signal Processing Proceedings","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031087667","display_name":"Olivier Gillet","orcid":null},"institutions":[{"id":"https://openalex.org/I12356871","display_name":"T\u00e9l\u00e9com Paris","ror":"https://ror.org/01naq7912","country_code":"FR","type":"education","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210165912","display_name":"Laboratoire Traitement et Communication de l\u2019Information","ror":"https://ror.org/057er4c39","country_code":"FR","type":"facility","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102","https://openalex.org/I4210165912"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"O. Gillet","raw_affiliation_strings":["GET/T\u00e9l\u00e9com Paris, CNRS LTCI, Paris, France","GET/T\u00e9l\u00e9com Paris CNRS LTCI, 37 rue Dareau. 75014 Paris, France. olivier.gillet@enst.fr"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"GET/T\u00e9l\u00e9com Paris, CNRS LTCI, Paris, France","institution_ids":["https://openalex.org/I4210165912","https://openalex.org/I1294671590","https://openalex.org/I12356871"]},{"raw_affiliation_string":"GET/T\u00e9l\u00e9com Paris CNRS LTCI, 37 rue Dareau. 75014 Paris, France. olivier.gillet@enst.fr","institution_ids":["https://openalex.org/I4210165912","https://openalex.org/I1294671590","https://openalex.org/I12356871"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113569245","display_name":"G. Richard","orcid":null},"institutions":[{"id":"https://openalex.org/I12356871","display_name":"T\u00e9l\u00e9com Paris","ror":"https://ror.org/01naq7912","country_code":"FR","type":"education","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210165912","display_name":"Laboratoire Traitement et Communication de l\u2019Information","ror":"https://ror.org/057er4c39","country_code":"FR","type":"facility","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102","https://openalex.org/I4210165912"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"G. Richard","raw_affiliation_strings":["GET/T\u00e9l\u00e9com Paris, CNRS LTCI, Paris, France","GET-T\u00e9l\u00e9com Paris#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"GET/T\u00e9l\u00e9com Paris, CNRS LTCI, Paris, France","institution_ids":["https://openalex.org/I4210165912","https://openalex.org/I1294671590","https://openalex.org/I12356871"]},{"raw_affiliation_string":"GET-T\u00e9l\u00e9com Paris#TAB#","institution_ids":["https://openalex.org/I12356871"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9701,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.75198485,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"5","issue":null,"first_page":"V","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8370621204376221},{"id":"https://openalex.org/keywords/histogram","display_name":"Histogram","score":0.6344309449195862},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6212963461875916},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5922297835350037},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5837222933769226},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.5355396866798401},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.5274115800857544},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5147497653961182},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.49709346890449524},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.48836037516593933},{"id":"https://openalex.org/keywords/audio-analyzer","display_name":"Audio analyzer","score":0.4599989354610443},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.45467668771743774},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3492814898490906},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.334047794342041},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.18771100044250488},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.12159579992294312},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.11545455455780029}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8370621204376221},{"id":"https://openalex.org/C53533937","wikidata":"https://www.wikidata.org/wiki/Q185020","display_name":"Histogram","level":3,"score":0.6344309449195862},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6212963461875916},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5922297835350037},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5837222933769226},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.5355396866798401},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.5274115800857544},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5147497653961182},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.49709346890449524},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.48836037516593933},{"id":"https://openalex.org/C160372630","wikidata":"https://www.wikidata.org/wiki/Q4819855","display_name":"Audio analyzer","level":5,"score":0.4599989354610443},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.45467668771743774},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3492814898490906},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.334047794342041},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.18771100044250488},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.12159579992294312},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.11545455455780029},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icassp.2006.1661202","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2006.1661202","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2006 IEEE International Conference on Acoustics Speed and Signal Processing Proceedings","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.459.2159","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.459.2159","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://perso.telecom-paristech.fr/~grichard/Publications/ICASSP06_Gillet.pdf","raw_type":"text"},{"id":"pmh:oai:HAL:hal-03153923v1","is_oa":false,"landing_page_url":"https://telecom-paris.hal.science/hal-03153923","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE International Conference on Acoustics Speed and Signal Processing, 2006, Toulouse, France. pp.V-21-V-24, &#x27E8;10.1109/ICASSP.2006.1661202&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5699999928474426}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W9205999","https://openalex.org/W56990849","https://openalex.org/W1499900670","https://openalex.org/W2100417949","https://openalex.org/W2104353650","https://openalex.org/W2105497548","https://openalex.org/W2106131881","https://openalex.org/W2114708606","https://openalex.org/W2118724348","https://openalex.org/W2120032316","https://openalex.org/W2124088136","https://openalex.org/W2128594317","https://openalex.org/W2132614156","https://openalex.org/W6600360952"],"related_works":["https://openalex.org/W2098934641","https://openalex.org/W1975359510","https://openalex.org/W2494533082","https://openalex.org/W4214771044","https://openalex.org/W4387698063","https://openalex.org/W4382560817","https://openalex.org/W3004352674","https://openalex.org/W3043119899","https://openalex.org/W3110605476","https://openalex.org/W1620668332"],"abstract_inverted_index":{"Music":[0],"videos":[1,128],"are":[2,18,40,107],"good":[3],"examples":[4],"of":[5,12,126],"multimedia":[6],"documents":[7],"in":[8,86],"which":[9],"the":[10,13,56,64,80,87,93,110],"structures":[11,29],"audio":[14,36,122],"and":[15,30,37,49,62,91],"video":[16,38,119],"streams":[17,39],"highly":[19],"correlated.":[20],"This":[21],"paper":[22],"presents":[23],"a":[24,68,98],"system":[25],"that":[26],"matches":[27],"these":[28],"extracts":[31],"audio-visual":[32],"correlation":[33,105],"measures.":[34],"The":[35],"independently":[41],"segmented":[42],"at":[43,55,63,79,92],"two-levels:":[44],"shots":[45],"(sections":[46],"for":[47],"audio)":[48],"events.":[50],"Audio":[51],"segmentation":[52,76],"is":[53,77],"performed":[54,78],"event":[57,81],"level":[58,66,82,95],"by":[59,67,83,96,129],"detecting":[60,84],"onsets,":[61],"section":[65],"novelty":[69],"detection":[70,102],"algorithm":[71],"identifying":[72],"instrumentation":[73],"changes.":[74],"Video":[75],"changes":[85],"motion":[88],"intensity":[89],"descriptor,":[90],"shot":[94,101],"using":[97],"classical":[99],"histogram-based":[100],"algorithm.":[103],"Audio-visual":[104],"measures":[106],"computed":[108],"on":[109],"extracted":[111],"structures.":[112],"Possible":[113],"applications":[114],"include":[115],"audio/video":[116],"stream":[117],"resynchronization,":[118],"retrieval":[120],"from":[121],"content,":[123],"or":[124],"classification":[125],"music":[127],"genre":[130]},"counts_by_year":[{"year":2013,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
