{"id":"https://openalex.org/W2006928209","doi":"https://doi.org/10.1109/icassp.2014.6853691","title":"Segmentation of music video streams in music pieces through audio-visual analysis","display_name":"Segmentation of music video streams in music pieces through audio-visual analysis","publication_year":2014,"publication_date":"2014-05-01","ids":{"openalex":"https://openalex.org/W2006928209","doi":"https://doi.org/10.1109/icassp.2014.6853691","mag":"2006928209"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2014.6853691","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2014.6853691","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-01006099","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086215267","display_name":"Gabriel Sargent","orcid":"https://orcid.org/0000-0002-5995-8104"},"institutions":[{"id":"https://openalex.org/I15057530","display_name":"Universit\u00e9 de Bordeaux","ror":"https://ror.org/057qpr032","country_code":"FR","type":"education","lineage":["https://openalex.org/I15057530"]},{"id":"https://openalex.org/I4210142254","display_name":"Laboratoire Bordelais de Recherche en Informatique","ror":"https://ror.org/03adqg323","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I15057530","https://openalex.org/I4210142254","https://openalex.org/I4210159245","https://openalex.org/I4210160189"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Gabriel Sargent","raw_affiliation_strings":["Universit\u00e9 de Bordeaux, Talence, France","LaBRI, Univ. de Bordeaux, Talence, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universit\u00e9 de Bordeaux, Talence, France","institution_ids":["https://openalex.org/I15057530"]},{"raw_affiliation_string":"LaBRI, Univ. de Bordeaux, Talence, France","institution_ids":["https://openalex.org/I4210142254"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072575396","display_name":"Pierre Hanna","orcid":"https://orcid.org/0000-0002-7355-9580"},"institutions":[{"id":"https://openalex.org/I15057530","display_name":"Universit\u00e9 de Bordeaux","ror":"https://ror.org/057qpr032","country_code":"FR","type":"education","lineage":["https://openalex.org/I15057530"]},{"id":"https://openalex.org/I4210142254","display_name":"Laboratoire Bordelais de Recherche en Informatique","ror":"https://ror.org/03adqg323","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I15057530","https://openalex.org/I4210142254","https://openalex.org/I4210159245","https://openalex.org/I4210160189"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Pierre Hanna","raw_affiliation_strings":["Universit\u00e9 de Bordeaux, Talence, France","LaBRI, Univ. de Bordeaux, Talence, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universit\u00e9 de Bordeaux, Talence, France","institution_ids":["https://openalex.org/I15057530"]},{"raw_affiliation_string":"LaBRI, Univ. de Bordeaux, Talence, France","institution_ids":["https://openalex.org/I4210142254"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001643044","display_name":"Henri Nicolas","orcid":"https://orcid.org/0000-0003-2179-4965"},"institutions":[{"id":"https://openalex.org/I15057530","display_name":"Universit\u00e9 de Bordeaux","ror":"https://ror.org/057qpr032","country_code":"FR","type":"education","lineage":["https://openalex.org/I15057530"]},{"id":"https://openalex.org/I4210142254","display_name":"Laboratoire Bordelais de Recherche en Informatique","ror":"https://ror.org/03adqg323","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I15057530","https://openalex.org/I4210142254","https://openalex.org/I4210159245","https://openalex.org/I4210160189"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Henri Nicolas","raw_affiliation_strings":["Universit\u00e9 de Bordeaux, Talence, France","LaBRI, Univ. de Bordeaux, Talence, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universit\u00e9 de Bordeaux, Talence, France","institution_ids":["https://openalex.org/I15057530"]},{"raw_affiliation_string":"LaBRI, Univ. de Bordeaux, Talence, France","institution_ids":["https://openalex.org/I4210142254"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.08333809,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"724","last_page":"728"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.820367157459259},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5648995637893677},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.545307457447052},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5185566544532776},{"id":"https://openalex.org/keywords/hue","display_name":"Hue","score":0.4994237422943115},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.43723559379577637},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4254070818424225},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.42266845703125},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.412450909614563},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3891363739967346}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.820367157459259},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5648995637893677},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.545307457447052},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5185566544532776},{"id":"https://openalex.org/C126537357","wikidata":"https://www.wikidata.org/wiki/Q372948","display_name":"Hue","level":2,"score":0.4994237422943115},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.43723559379577637},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4254070818424225},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.42266845703125},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.412450909614563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3891363739967346},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icassp.2014.6853691","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2014.6853691","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-01006099v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01006099","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP'14), 2014, Italy. 5 p","raw_type":"Conference papers"},{"id":"pmh:oai:HAL:hal-01011235v1","is_oa":false,"landing_page_url":"https://hal.science/hal-01011235","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ICASSP, May 2014, Florence, Italy","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-01006099v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01006099","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP'14), 2014, Italy. 5 p","raw_type":"Conference papers"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6499999761581421}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W15066456","https://openalex.org/W591222336","https://openalex.org/W1598357505","https://openalex.org/W2030564830","https://openalex.org/W2053101950","https://openalex.org/W2131569171","https://openalex.org/W2139107403","https://openalex.org/W2150231871","https://openalex.org/W2226264137","https://openalex.org/W2295300449","https://openalex.org/W2395069933","https://openalex.org/W2395752715","https://openalex.org/W2406901689","https://openalex.org/W4249294823","https://openalex.org/W6600593648","https://openalex.org/W6617581451","https://openalex.org/W6696886941","https://openalex.org/W6711916615","https://openalex.org/W6711953163","https://openalex.org/W6713620424"],"related_works":["https://openalex.org/W4320518079","https://openalex.org/W2039822179","https://openalex.org/W4386771591","https://openalex.org/W2972873516","https://openalex.org/W2405125474","https://openalex.org/W2074864407","https://openalex.org/W2364223252","https://openalex.org/W1989502759","https://openalex.org/W2363908918","https://openalex.org/W2153132133"],"abstract_inverted_index":{"Today,":[0],"technologies":[1],"for":[2],"information":[3],"storage":[4],"and":[5,10,25,55,122,149,175],"transmission":[6],"allow":[7],"the":[8,34,62,95,108,116,132,138,165,180],"creation":[9],"development":[11],"of":[12,15,36,40,57,97,111,119,140,143,155,167,183],"huge":[13],"databases":[14],"multimedia":[16,41],"content.":[17],"Tools":[18],"are":[19],"needed":[20],"to":[21,84,136,172],"facilitate":[22],"their":[23,120],"access":[24],"browsing.":[26],"In":[27,101],"this":[28,30,102,156],"context,":[29],"article":[31],"focuses":[32],"on":[33,115,131,158],"segmentation":[35],"a":[37,76,176],"particular":[38],"category":[39,50],"content,":[42],"audio-visual":[43,98],"musical":[44,66,99,121],"streams,":[45],"into":[46],"music":[47,58,112],"pieces.":[48],"This":[49],"includes":[51],"concert":[52,162],"audio-video":[53],"recordings,":[54],"sequences":[56],"videos":[59],"such":[60],"as":[61],"ones":[63],"found":[64],"in":[65,72,75,94,186],"TV":[67],"channels.":[68],"Current":[69],"approaches":[70],"consist":[71],"supervised":[73],"clustering":[74],"few":[77],"audio":[78],"classes":[79],"(music,":[80],"speech,":[81],"noise),":[82],"and,":[83],"our":[85],"knowledge,":[86],"no":[87],"consistent":[88],"evaluation":[89,154],"has":[90],"been":[91],"performed":[92],"yet":[93],"case":[96],"streams.":[100],"paper,":[103],"we":[104],"aim":[105],"at":[106],"estimating":[107],"temporal":[109],"boundaries":[110],"pieces":[113],"relying":[114],"assumed":[117],"homogeneity":[118],"visual":[123,184],"properties.":[124],"We":[125],"consider":[126],"an":[127],"unsupervised":[128],"approach":[129,157],"based":[130],"generalized":[133],"likelihood":[134],"ratio":[135],"evaluate":[137],"presence":[139],"statistical":[141],"breakdowns":[142],"MFCCs,":[144],"Chroma":[145],"vectors,":[146],"dominant":[147],"Hue":[148],"Lightness":[150],"over":[151],"time.":[152],"An":[153],"15":[159],"manually":[160],"annotated":[161],"streams":[163],"shows":[164],"advantage":[166],"combining":[168],"tonal":[169],"content":[170],"features":[171,185],"timbral":[173],"ones,":[174],"modest":[177],"impact":[178],"from":[179],"joint":[181],"use":[182],"boundary":[187],"estimation.":[188]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
