{"id":"https://openalex.org/W2996889020","doi":"https://doi.org/10.1109/taslp.2019.2957889","title":"Weakly Supervised Representation Learning for Audio-Visual Scene Analysis","display_name":"Weakly Supervised Representation Learning for Audio-Visual Scene Analysis","publication_year":2019,"publication_date":"2019-12-24","ids":{"openalex":"https://openalex.org/W2996889020","doi":"https://doi.org/10.1109/taslp.2019.2957889","mag":"2996889020"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2019.2957889","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2019.2957889","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001145583","display_name":"Sanjeel Parekh","orcid":"https://orcid.org/0000-0002-2251-7436"},"institutions":[{"id":"https://openalex.org/I12356871","display_name":"T\u00e9l\u00e9com Paris","ror":"https://ror.org/01naq7912","country_code":"FR","type":"education","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102"]},{"id":"https://openalex.org/I2929663463","display_name":"Technicolor (Germany)","ror":"https://ror.org/00besvm65","country_code":"DE","type":"company","lineage":["https://openalex.org/I2929663463","https://openalex.org/I4210121266"]}],"countries":["DE","FR"],"is_corresponding":true,"raw_author_name":"Sanjeel Parekh","raw_affiliation_strings":["Telecom Paris, Paris, France","Technicolor R & I [Cesson S\u00e9vign\u00e9]"],"affiliations":[{"raw_affiliation_string":"Telecom Paris, Paris, France","institution_ids":["https://openalex.org/I12356871"]},{"raw_affiliation_string":"Technicolor R & I [Cesson S\u00e9vign\u00e9]","institution_ids":["https://openalex.org/I2929663463"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060031161","display_name":"Slim Essid","orcid":"https://orcid.org/0000-0002-0028-327X"},"institutions":[{"id":"https://openalex.org/I12356871","display_name":"T\u00e9l\u00e9com Paris","ror":"https://ror.org/01naq7912","country_code":"FR","type":"education","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102"]},{"id":"https://openalex.org/I2802330208","display_name":"Statistics Belgium","ror":"https://ror.org/0246a9012","country_code":"BE","type":"government","lineage":["https://openalex.org/I2802330208","https://openalex.org/I4210095879"]}],"countries":["BE","FR"],"is_corresponding":false,"raw_author_name":"Slim Essid","raw_affiliation_strings":["Telecom Paris, Paris, France","D\u00e9partement Images, Donn\u00e9es, Signal","Signal, Statistique et Apprentissage"],"affiliations":[{"raw_affiliation_string":"Telecom Paris, Paris, France","institution_ids":["https://openalex.org/I12356871"]},{"raw_affiliation_string":"D\u00e9partement Images, Donn\u00e9es, Signal","institution_ids":[]},{"raw_affiliation_string":"Signal, Statistique et Apprentissage","institution_ids":["https://openalex.org/I2802330208"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073788938","display_name":"Alexey Ozerov","orcid":"https://orcid.org/0000-0003-4834-5166"},"institutions":[{"id":"https://openalex.org/I2929663463","display_name":"Technicolor (Germany)","ror":"https://ror.org/00besvm65","country_code":"DE","type":"company","lineage":["https://openalex.org/I2929663463","https://openalex.org/I4210121266"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Alexey Ozerov","raw_affiliation_strings":["InterDigital, Cesson Sevigne, France","Technicolor R & I [Cesson S\u00e9vign\u00e9]"],"affiliations":[{"raw_affiliation_string":"InterDigital, Cesson Sevigne, France","institution_ids":[]},{"raw_affiliation_string":"Technicolor R & I [Cesson S\u00e9vign\u00e9]","institution_ids":["https://openalex.org/I2929663463"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041584595","display_name":"Ngoc Q. K. Duong","orcid":"https://orcid.org/0000-0002-7627-8194"},"institutions":[{"id":"https://openalex.org/I2929663463","display_name":"Technicolor (Germany)","ror":"https://ror.org/00besvm65","country_code":"DE","type":"company","lineage":["https://openalex.org/I2929663463","https://openalex.org/I4210121266"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ngoc Q. K. Duong","raw_affiliation_strings":["InterDigital, Cesson Sevigne, France","Technicolor R & I [Cesson S\u00e9vign\u00e9]"],"affiliations":[{"raw_affiliation_string":"InterDigital, Cesson Sevigne, France","institution_ids":[]},{"raw_affiliation_string":"Technicolor R & I [Cesson S\u00e9vign\u00e9]","institution_ids":["https://openalex.org/I2929663463"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076170578","display_name":"Patrick P\u00e9rez","orcid":"https://orcid.org/0000-0002-8124-1206"},"institutions":[{"id":"https://openalex.org/I220619192","display_name":"Valeo (France)","ror":"https://ror.org/04ryqpf83","country_code":"FR","type":"company","lineage":["https://openalex.org/I220619192"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Patrick Perez","raw_affiliation_strings":["Valeo.ai, 75008 Paris, France","Valeo.ai"],"affiliations":[{"raw_affiliation_string":"Valeo.ai, 75008 Paris, France","institution_ids":["https://openalex.org/I220619192"]},{"raw_affiliation_string":"Valeo.ai","institution_ids":["https://openalex.org/I220619192"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055423112","display_name":"Ga\u00ebl Richard","orcid":"https://orcid.org/0000-0002-4960-0010"},"institutions":[{"id":"https://openalex.org/I12356871","display_name":"T\u00e9l\u00e9com Paris","ror":"https://ror.org/01naq7912","country_code":"FR","type":"education","lineage":["https://openalex.org/I12356871","https://openalex.org/I205703379","https://openalex.org/I4210145102"]},{"id":"https://openalex.org/I2802330208","display_name":"Statistics Belgium","ror":"https://ror.org/0246a9012","country_code":"BE","type":"government","lineage":["https://openalex.org/I2802330208","https://openalex.org/I4210095879"]}],"countries":["BE","FR"],"is_corresponding":false,"raw_author_name":"Gael Richard","raw_affiliation_strings":["Telecom Paris, Paris, France","Signal, Statistique et Apprentissage","D\u00e9partement Images, Donn\u00e9es, Signal"],"affiliations":[{"raw_affiliation_string":"Telecom Paris, Paris, France","institution_ids":["https://openalex.org/I12356871"]},{"raw_affiliation_string":"Signal, Statistique et Apprentissage","institution_ids":["https://openalex.org/I2802330208"]},{"raw_affiliation_string":"D\u00e9partement Images, Donn\u00e9es, Signal","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5001145583"],"corresponding_institution_ids":["https://openalex.org/I12356871","https://openalex.org/I2929663463"],"apc_list":null,"apc_paid":null,"fwci":2.3458,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.89681474,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"28","issue":null,"first_page":"416","last_page":"428"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7757585048675537},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.6344183683395386},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6100279092788696},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.591815173625946},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5785441994667053},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.5678497552871704},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.5123807191848755},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5109649896621704},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4708858132362366},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.42053431272506714},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.41180577874183655},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37352585792541504},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.35039860010147095},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32404571771621704},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.10082101821899414}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7757585048675537},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.6344183683395386},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6100279092788696},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.591815173625946},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5785441994667053},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.5678497552871704},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.5123807191848755},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5109649896621704},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4708858132362366},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.42053431272506714},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.41180577874183655},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37352585792541504},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.35039860010147095},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32404571771621704},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.10082101821899414},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2019.2957889","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2019.2957889","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":97,"referenced_works":["https://openalex.org/W7746136","https://openalex.org/W639708223","https://openalex.org/W874179280","https://openalex.org/W1511170870","https://openalex.org/W1522301498","https://openalex.org/W1523385540","https://openalex.org/W1536680647","https://openalex.org/W1559046793","https://openalex.org/W1777628566","https://openalex.org/W1952794764","https://openalex.org/W1994488211","https://openalex.org/W2015433306","https://openalex.org/W2025954386","https://openalex.org/W2036931824","https://openalex.org/W2039844283","https://openalex.org/W2065274193","https://openalex.org/W2086384421","https://openalex.org/W2088049833","https://openalex.org/W2102605133","https://openalex.org/W2104446196","https://openalex.org/W2105582566","https://openalex.org/W2106841609","https://openalex.org/W2108598243","https://openalex.org/W2109255472","https://openalex.org/W2110119381","https://openalex.org/W2110226160","https://openalex.org/W2115447976","https://openalex.org/W2127851351","https://openalex.org/W2132984949","https://openalex.org/W2133324800","https://openalex.org/W2141355815","https://openalex.org/W2148154194","https://openalex.org/W2152617463","https://openalex.org/W2163605009","https://openalex.org/W2166010828","https://openalex.org/W2175354415","https://openalex.org/W2186827065","https://openalex.org/W2295107390","https://openalex.org/W2306289963","https://openalex.org/W2315268655","https://openalex.org/W2354870669","https://openalex.org/W2463565445","https://openalex.org/W2464894339","https://openalex.org/W2511428026","https://openalex.org/W2519284461","https://openalex.org/W2524365899","https://openalex.org/W2526050071","https://openalex.org/W2593116425","https://openalex.org/W2613718673","https://openalex.org/W2618530766","https://openalex.org/W2618602706","https://openalex.org/W2619697695","https://openalex.org/W2619947201","https://openalex.org/W2632052911","https://openalex.org/W2751116311","https://openalex.org/W2775505379","https://openalex.org/W2783473931","https://openalex.org/W2791956393","https://openalex.org/W2798158991","https://openalex.org/W2918555356","https://openalex.org/W2939574508","https://openalex.org/W2962865004","https://openalex.org/W2962910554","https://openalex.org/W2963099423","https://openalex.org/W2963115079","https://openalex.org/W2963150697","https://openalex.org/W2963218389","https://openalex.org/W2963603913","https://openalex.org/W2963610932","https://openalex.org/W2964109005","https://openalex.org/W2964121744","https://openalex.org/W2964345931","https://openalex.org/W2997685131","https://openalex.org/W3103314642","https://openalex.org/W3123940584","https://openalex.org/W4245923654","https://openalex.org/W4289665794","https://openalex.org/W6600313631","https://openalex.org/W6631190155","https://openalex.org/W6631216910","https://openalex.org/W6657008262","https://openalex.org/W6675803743","https://openalex.org/W6679390333","https://openalex.org/W6684369376","https://openalex.org/W6686576746","https://openalex.org/W6719057275","https://openalex.org/W6743647910","https://openalex.org/W6745928684","https://openalex.org/W6747331233","https://openalex.org/W6749158954","https://openalex.org/W6749633790","https://openalex.org/W6749767719","https://openalex.org/W6750599028","https://openalex.org/W6756219837","https://openalex.org/W6760029326","https://openalex.org/W6955071965","https://openalex.org/W7071105756"],"related_works":["https://openalex.org/W2965546495","https://openalex.org/W4389116644","https://openalex.org/W2153315159","https://openalex.org/W3103844505","https://openalex.org/W3119773509","https://openalex.org/W3208297503","https://openalex.org/W2889153461","https://openalex.org/W2964117661","https://openalex.org/W4388405611","https://openalex.org/W2619127353"],"abstract_inverted_index":{"Audio-visual":[0],"(AV)":[1],"representation":[2],"learning":[3],"is":[4,52,104],"an":[5],"important":[6,99],"task":[7],"from":[8,109],"the":[9,15,75,122],"perspective":[10],"of":[11,101,125,131,141,164],"designing":[12],"machines":[13],"with":[14,138,152,178],"ability":[16,118],"to":[17,107,119,155],"understand":[18],"complex":[19],"events.":[20,112],"To":[21],"this":[22,51],"end,":[23],"we":[24,36],"propose":[25],"a":[26,128,139],"novel":[27,129],"multimodal":[28],"framework":[29],"that":[30,39,74],"instantiates":[31],"multiple":[32],"instance":[33],"learning.":[34],"Specifically,":[35],"develop":[37],"methods":[38],"identify":[40],"events":[41],"and":[42,94,168],"localize":[43],"corresponding":[44],"AV":[45],"cues":[46,183],"in":[47,70],"unconstrained":[48],"videos.":[49],"Importantly,":[50],"done":[53],"using":[54],"weak":[55],"labels":[56,61],"where":[57,181],"only":[58],"video-level":[59],"event":[60,89],"are":[62,78,143],"known":[63],"without":[64],"any":[65],"information":[66],"about":[67],"their":[68],"location":[69],"time.":[71],"We":[72,113],"show":[73],"learnt":[76],"representations":[77],"useful":[79],"for":[80],"performing":[81],"several":[82],"tasks":[83],"such":[84,159],"as":[85,160],"event/object":[86],"classification,":[87],"audio":[88,91,123,169],"detection,":[90],"source":[92,124],"separation":[93],"visual":[95,166],"object":[96,157],"localization.":[97],"An":[98],"feature":[100],"our":[102,116,172],"method":[103],"its":[105],"capacity":[106],"learn":[108],"unsynchronized":[110],"audio-visual":[111],"also":[114],"demonstrate":[115],"framework's":[117],"separate":[120],"out":[121],"interest":[126],"through":[127],"use":[130],"nonnegative":[132],"matrix":[133],"factorization.":[134],"State-of-the-art":[135],"classification":[136],"results,":[137],"F1-score":[140],"65.0,":[142],"achieved":[144],"on":[145],"DCASE":[146],"2017":[147],"smart":[148],"cars":[149],"challenge":[150],"data":[151],"promising":[153],"generalization":[154],"diverse":[156],"types":[158],"musical":[161],"instruments.":[162],"Visualizations":[163],"localized":[165],"regions":[167],"segments":[170],"substantiate":[171],"system's":[173],"efficacy,":[174],"especially":[175],"when":[176],"dealing":[177],"noisy":[179],"situations":[180],"modality-specific":[182],"appear":[184],"asynchronously.":[185]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":3},{"year":2012,"cited_by_count":1}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
