{"id":"https://openalex.org/W2168010845","doi":"https://doi.org/10.1109/tmm.2010.2050650","title":"Blind Audiovisual Source Separation Based on Sparse Redundant Representations","display_name":"Blind Audiovisual Source Separation Based on Sparse Redundant Representations","publication_year":2010,"publication_date":"2010-05-26","ids":{"openalex":"https://openalex.org/W2168010845","doi":"https://doi.org/10.1109/tmm.2010.2050650","mag":"2168010845"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2010.2050650","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2010.2050650","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://inria.hal.science/inria-00541412","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052800796","display_name":"Anna Llagostera Casanovas","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Anna Llagostera Casanovas","raw_affiliation_strings":["Signal Processing Laboratory 2, EPF Lausanne, Lausanne, Switzerland"],"affiliations":[{"raw_affiliation_string":"Signal Processing Laboratory 2, EPF Lausanne, Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077875525","display_name":"Gianluca Monaci","orcid":"https://orcid.org/0000-0001-5514-8457"},"institutions":[{"id":"https://openalex.org/I4210122849","display_name":"Philips (Netherlands)","ror":"https://ror.org/02p2bgp27","country_code":"NL","type":"company","lineage":["https://openalex.org/I4210122849"]},{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH","NL"],"is_corresponding":false,"raw_author_name":"Gianluca Monaci","raw_affiliation_strings":["Signal Processing Laboratory 2, EPF Lausanne, Switzerland","Video and Image Processing Group, Philips Research, Eindhoven, Netherlands"],"affiliations":[{"raw_affiliation_string":"Signal Processing Laboratory 2, EPF Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]},{"raw_affiliation_string":"Video and Image Processing Group, Philips Research, Eindhoven, Netherlands","institution_ids":["https://openalex.org/I4210122849"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028520858","display_name":"Pierre Vandergheynst","orcid":"https://orcid.org/0000-0002-9070-900X"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Pierre Vandergheynst","raw_affiliation_strings":["Signal Processing Laboratory 2, EPF Lausanne, Lausanne, Switzerland"],"affiliations":[{"raw_affiliation_string":"Signal Processing Laboratory 2, EPF Lausanne, Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041230403","display_name":"R\u00e9mi Gribonval","orcid":"https://orcid.org/0000-0002-9450-8125"},"institutions":[{"id":"https://openalex.org/I4210133778","display_name":"Inria Rennes - Bretagne Atlantique Research Centre","ror":"https://ror.org/04040yw90","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210133778"]},{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"R\u00e9mi Gribonval","raw_affiliation_strings":["Centre de Recherche, METISS Group, INRIA Rennes Bretagne Atlantique, Rennes, France"],"affiliations":[{"raw_affiliation_string":"Centre de Recherche, METISS Group, INRIA Rennes Bretagne Atlantique, Rennes, France","institution_ids":["https://openalex.org/I4210133778","https://openalex.org/I1326498283"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5052800796"],"corresponding_institution_ids":["https://openalex.org/I5124864"],"apc_list":null,"apc_paid":null,"fwci":8.7927,"has_fulltext":false,"cited_by_count":78,"citation_normalized_percentile":{"value":0.98424997,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"12","issue":"5","first_page":"358","last_page":"371"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8496080636978149},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.6281336545944214},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.57230144739151},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.5421491861343384},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5342748761177063},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.5260928869247437},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5236425399780273},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.4569327235221863},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.45124882459640503},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4278680086135864},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4254993796348572},{"id":"https://openalex.org/keywords/blind-signal-separation","display_name":"Blind signal separation","score":0.4192074239253998},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.37379902601242065},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.12708869576454163}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8496080636978149},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.6281336545944214},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.57230144739151},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.5421491861343384},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5342748761177063},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.5260928869247437},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5236425399780273},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.4569327235221863},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.45124882459640503},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4278680086135864},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4254993796348572},{"id":"https://openalex.org/C120317606","wikidata":"https://www.wikidata.org/wiki/Q17105967","display_name":"Blind signal separation","level":3,"score":0.4192074239253998},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.37379902601242065},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.12708869576454163},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/tmm.2010.2050650","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2010.2050650","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.167.2758","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.167.2758","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://infoscience.epfl.ch/record/131148/files/double_1.pdf?version=4","raw_type":"text"},{"id":"pmh:oai:HAL:inria-00541412v1","is_oa":true,"landing_page_url":"https://inria.hal.science/inria-00541412","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Multimedia, 2010, 12 (5), pp.358 -- 371. &#x27E8;10.1109/TMM.2010.2050650&#x27E9;","raw_type":"Journal articles"},{"id":"pmh:oai:infoscience.epfl.ch:131148","is_oa":true,"landing_page_url":"http://infoscience.epfl.ch/record/131148","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:HAL:inria-00541412v1","is_oa":true,"landing_page_url":"https://inria.hal.science/inria-00541412","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Multimedia, 2010, 12 (5), pp.358 -- 371. &#x27E8;10.1109/TMM.2010.2050650&#x27E9;","raw_type":"Journal articles"},"sustainable_development_goals":[{"score":0.46000000834465027,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G2920890412","display_name":null,"funder_award_id":"17884","funder_id":"https://openalex.org/F4320338370","funder_display_name":"FP7 Information and Communication Technologies"}],"funders":[{"id":"https://openalex.org/F4320338370","display_name":"FP7 Information and Communication Technologies","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W177229737","https://openalex.org/W186337737","https://openalex.org/W753830768","https://openalex.org/W1543025785","https://openalex.org/W1544215838","https://openalex.org/W1566152912","https://openalex.org/W1570733016","https://openalex.org/W1573024203","https://openalex.org/W1574316096","https://openalex.org/W1593513256","https://openalex.org/W1769974409","https://openalex.org/W1987906574","https://openalex.org/W2014621385","https://openalex.org/W2029192628","https://openalex.org/W2038010270","https://openalex.org/W2071925068","https://openalex.org/W2087774313","https://openalex.org/W2096391593","https://openalex.org/W2098717567","https://openalex.org/W2099904336","https://openalex.org/W2125045933","https://openalex.org/W2126779329","https://openalex.org/W2127851351","https://openalex.org/W2134026924","https://openalex.org/W2142352693","https://openalex.org/W2144282867","https://openalex.org/W2145379201","https://openalex.org/W2147301311","https://openalex.org/W2148659689","https://openalex.org/W2151693816","https://openalex.org/W2154151281","https://openalex.org/W2158911666","https://openalex.org/W2166847868","https://openalex.org/W2171819471","https://openalex.org/W2278884322","https://openalex.org/W2328570161","https://openalex.org/W4285719527","https://openalex.org/W6632614696","https://openalex.org/W6633969281","https://openalex.org/W6694617781"],"related_works":["https://openalex.org/W1509813908","https://openalex.org/W2031820693","https://openalex.org/W1910172735","https://openalex.org/W2107364365","https://openalex.org/W2118307209","https://openalex.org/W2113403277","https://openalex.org/W3112881379","https://openalex.org/W2002298560","https://openalex.org/W2019256813","https://openalex.org/W2158216966"],"abstract_inverted_index":{"In":[0,41],"this":[1,72],"paper,":[2],"we":[3],"propose":[4],"a":[5,19,32,35,42,85],"novel":[6],"method":[7,22,167],"which":[8,139],"is":[9,68,104,168],"able":[10,169],"to":[11,111,131,170],"detect":[12],"and":[13,34,46,65,79,109,153,159,175],"separate":[14,132],"audiovisual":[15,75,178],"sources":[16,76,123,141],"present":[17,177],"in":[18,63,81,136],"scene.":[20],"Our":[21],"exploits":[23],"the":[24,27,82,97,122,133,165],"correlation":[25],"between":[26,60],"video":[28,47,66,91],"signal":[29,135],"captured":[30],"with":[31,96],"camera":[33],"synchronously":[36],"recorded":[37],"one-microphone":[38],"audio":[39,45,64,134],"track.":[40],"first":[43],"stage,":[44],"modalities":[48,67],"are":[49,77,107,129,142],"decomposed":[50],"into":[51],"relevant":[52,61],"basic":[53],"structures":[54,92],"using":[55,84],"redundant":[56],"representations.":[57],"Next,":[58],"synchrony":[59],"events":[62],"quantified.":[69],"Based":[70],"on":[71,151],"co-occurrence":[73],"measure,":[74],"counted":[78],"located":[80],"image":[83],"robust":[86],"clustering":[87],"algorithm":[88],"that":[89,164],"groups":[90],"exhibiting":[93],"strong":[94],"correlations":[95],"audio.":[98],"Next":[99],"periods":[100,137],"where":[101],"each":[102],"source":[103],"active":[105],"alone":[106],"determined":[108],"used":[110,130],"build":[112],"<emphasis":[113],"emphasistype=\"boldital\"":[114],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[115],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">spectral</emphasis>":[116],"Gaussian":[117],"mixture":[118],"models":[119,128],"(GMMs)":[120],"characterizing":[121],"acoustic":[124],"behavior.":[125],"Finally,":[126],"these":[127],"during":[138],"several":[140],"mixed.":[143],"The":[144],"proposed":[145,166],"approach":[146],"has":[147],"been":[148],"extensively":[149],"tested":[150],"synthetic":[152],"natural":[154],"sequences":[155],"composed":[156],"of":[157],"speakers":[158],"music":[160],"instruments.":[161],"Results":[162],"show":[163],"successfully":[171],"detect,":[172],"localize,":[173],"separate,":[174],"reconstruct":[176],"sources.":[179]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":7},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":6},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":9}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
