{"id":"https://openalex.org/W2939663618","doi":"https://doi.org/10.1109/wacv45572.2020.9093307","title":"Audio-Visual Model Distillation Using Acoustic Images","display_name":"Audio-Visual Model Distillation Using Acoustic Images","publication_year":2020,"publication_date":"2020-03-01","ids":{"openalex":"https://openalex.org/W2939663618","doi":"https://doi.org/10.1109/wacv45572.2020.9093307","mag":"2939663618"},"language":"en","primary_location":{"id":"doi:10.1109/wacv45572.2020.9093307","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv45572.2020.9093307","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1904.07933","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000471667","display_name":"Andr\u00e9s F. P\u00e9rez","orcid":null},"institutions":[{"id":"https://openalex.org/I30771326","display_name":"Italian Institute of Technology","ror":"https://ror.org/042t93s57","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Andres F. Perez","raw_affiliation_strings":["Pattern Analysis & Computer Vision, Istituto Italiano di Tecnologia","Istituto Italiano di Tecnologia,Pattern Analysis & Computer Vision"],"affiliations":[{"raw_affiliation_string":"Pattern Analysis & Computer Vision, Istituto Italiano di Tecnologia","institution_ids":["https://openalex.org/I30771326"]},{"raw_affiliation_string":"Istituto Italiano di Tecnologia,Pattern Analysis & Computer Vision","institution_ids":["https://openalex.org/I30771326"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010361695","display_name":"Valentina Sanguineti","orcid":"https://orcid.org/0000-0001-7995-6205"},"institutions":[{"id":"https://openalex.org/I30771326","display_name":"Italian Institute of Technology","ror":"https://ror.org/042t93s57","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Valentina Sanguineti","raw_affiliation_strings":["Pattern Analysis & Computer Vision, Istituto Italiano di Tecnologia","Istituto Italiano di Tecnologia,Pattern Analysis & Computer Vision"],"affiliations":[{"raw_affiliation_string":"Pattern Analysis & Computer Vision, Istituto Italiano di Tecnologia","institution_ids":["https://openalex.org/I30771326"]},{"raw_affiliation_string":"Istituto Italiano di Tecnologia,Pattern Analysis & Computer Vision","institution_ids":["https://openalex.org/I30771326"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017103754","display_name":"Pietro Morerio","orcid":"https://orcid.org/0000-0001-5259-1496"},"institutions":[{"id":"https://openalex.org/I30771326","display_name":"Italian Institute of Technology","ror":"https://ror.org/042t93s57","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Pietro Morerio","raw_affiliation_strings":["Pattern Analysis & Computer Vision, Istituto Italiano di Tecnologia","Istituto Italiano di Tecnologia,Pattern Analysis & Computer Vision"],"affiliations":[{"raw_affiliation_string":"Pattern Analysis & Computer Vision, Istituto Italiano di Tecnologia","institution_ids":["https://openalex.org/I30771326"]},{"raw_affiliation_string":"Istituto Italiano di Tecnologia,Pattern Analysis & Computer Vision","institution_ids":["https://openalex.org/I30771326"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007242502","display_name":"Vittorio Murino","orcid":"https://orcid.org/0000-0002-8645-2328"},"institutions":[{"id":"https://openalex.org/I30771326","display_name":"Italian Institute of Technology","ror":"https://ror.org/042t93s57","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Vittorio Murino","raw_affiliation_strings":["Pattern Analysis & Computer Vision, Istituto Italiano di Tecnologia","Istituto Italiano di Tecnologia,Pattern Analysis & Computer Vision"],"affiliations":[{"raw_affiliation_string":"Pattern Analysis & Computer Vision, Istituto Italiano di Tecnologia","institution_ids":["https://openalex.org/I30771326"]},{"raw_affiliation_string":"Istituto Italiano di Tecnologia,Pattern Analysis & Computer Vision","institution_ids":["https://openalex.org/I30771326"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5000471667"],"corresponding_institution_ids":["https://openalex.org/I30771326"],"apc_list":null,"apc_paid":null,"fwci":0.4571,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.58711155,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"2843","last_page":"2852"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9739999771118164,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.761961817741394},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.6836903095245361},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.5407716035842896},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5359863042831421},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5072628855705261},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.48904260993003845},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.4529728293418884},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4411587417125702},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4386158883571625},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.4287247657775879},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3233853280544281},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.15928059816360474},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.1333473026752472}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.761961817741394},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.6836903095245361},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.5407716035842896},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5359863042831421},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5072628855705261},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.48904260993003845},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.4529728293418884},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4411587417125702},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4386158883571625},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.4287247657775879},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3233853280544281},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.15928059816360474},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.1333473026752472},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/wacv45572.2020.9093307","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv45572.2020.9093307","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1904.07933","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1904.07933","pdf_url":"https://arxiv.org/pdf/1904.07933","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2939663618","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1904.07933.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1904.07933","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1904.07933","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1904.07933","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1904.07933","pdf_url":"https://arxiv.org/pdf/1904.07933","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2939663618.pdf"},"referenced_works_count":61,"referenced_works":["https://openalex.org/W653761051","https://openalex.org/W1821462560","https://openalex.org/W1836465849","https://openalex.org/W1859622625","https://openalex.org/W1928278792","https://openalex.org/W2032337854","https://openalex.org/W2052666245","https://openalex.org/W2105582566","https://openalex.org/W2113896236","https://openalex.org/W2136132422","https://openalex.org/W2143613593","https://openalex.org/W2151096985","https://openalex.org/W2184188583","https://openalex.org/W2194775991","https://openalex.org/W2235381883","https://openalex.org/W2253986341","https://openalex.org/W2402324096","https://openalex.org/W2463402750","https://openalex.org/W2474574787","https://openalex.org/W2511428026","https://openalex.org/W2556930864","https://openalex.org/W2570915410","https://openalex.org/W2593116425","https://openalex.org/W2619697695","https://openalex.org/W2619947201","https://openalex.org/W2797090057","https://openalex.org/W2809440904","https://openalex.org/W2883935097","https://openalex.org/W2885782501","https://openalex.org/W2902135157","https://openalex.org/W2962756039","https://openalex.org/W2962865004","https://openalex.org/W2962946266","https://openalex.org/W2962960500","https://openalex.org/W2962969419","https://openalex.org/W2963082324","https://openalex.org/W2963115079","https://openalex.org/W2963155035","https://openalex.org/W2963680395","https://openalex.org/W2964052309","https://openalex.org/W2964109005","https://openalex.org/W2964121744","https://openalex.org/W2964139702","https://openalex.org/W2964345931","https://openalex.org/W2972425456","https://openalex.org/W3123318516","https://openalex.org/W4289665794","https://openalex.org/W6631190155","https://openalex.org/W6638523607","https://openalex.org/W6638667902","https://openalex.org/W6686207219","https://openalex.org/W6713224384","https://openalex.org/W6729831399","https://openalex.org/W6729977899","https://openalex.org/W6738806211","https://openalex.org/W6751130187","https://openalex.org/W6752215144","https://openalex.org/W6752516136","https://openalex.org/W6753511437","https://openalex.org/W6756442499","https://openalex.org/W6955071965"],"related_works":["https://openalex.org/W3104797222","https://openalex.org/W2997685131","https://openalex.org/W3095909497","https://openalex.org/W3155169551","https://openalex.org/W3001200001","https://openalex.org/W2795478783","https://openalex.org/W2968808972","https://openalex.org/W2908965201","https://openalex.org/W3034742263","https://openalex.org/W2796992393","https://openalex.org/W3157866890","https://openalex.org/W2996889020","https://openalex.org/W2890936049","https://openalex.org/W2062529116","https://openalex.org/W3198586259","https://openalex.org/W2483684792","https://openalex.org/W2785892019","https://openalex.org/W3033833751","https://openalex.org/W2945439681","https://openalex.org/W3130828980"],"abstract_inverted_index":{"In":[0,125],"this":[1,64,112],"paper,":[2],"we":[3,115,127],"investigate":[4],"how":[5],"to":[6],"learn":[7,29],"rich":[8],"and":[9,19,48,88,100,107,136,150],"robust":[10,56],"feature":[11],"representations":[12,31,52,146],"for":[13],"audio":[14,24,30,86,117,131,165],"classification":[15,47],"from":[16,32,133,159],"visual":[17,99,135],"data":[18,25,37],"acoustic":[20,90,95,101,137],"images,":[21,96],"a":[22,40,68,77,122],"novel":[23],"modality.":[26],"Former":[27],"models":[28,120,160],"raw":[33,85],"signals":[34],"or":[35],"spectral":[36],"acquired":[38,75],"by":[39,66,76],"single":[41],"microphone,":[42],"with":[43],"remarkable":[44],"results":[45],"in":[46,105,109,121],"retrieval.":[49],"However,":[50],"such":[51],"are":[53,103,147],"not":[54],"so":[55],"towards":[57],"variable":[58],"environmental":[59],"sound":[60],"conditions.":[61],"We":[62],"tackle":[63],"drawback":[65],"exploiting":[67],"new":[69],"multimodal":[70],"labeled":[71],"action":[72],"recognition":[73],"dataset":[74],"hybrid":[78],"audio-visual":[79],"sensor":[80],"that":[81,143],"provides":[82],"RGB":[83],"video,":[84],"signals,":[87],"spatialized":[89],"data,":[91],"also":[92],"known":[93],"as":[94],"where":[97],"the":[98,144,156],"images":[102],"aligned":[104],"space":[106],"synchronized":[108],"time.":[110],"Using":[111],"richer":[113],"information,":[114],"train":[116],"deep":[118],"learning":[119],"teacher-student":[123],"fashion.":[124],"particular,":[126],"distill":[128],"knowledge":[129],"into":[130],"networks":[132],"both":[134],"image":[138],"teachers.":[139],"Our":[140],"experiments":[141],"suggest":[142],"learned":[145,158],"more":[148],"powerful":[149],"have":[151],"better":[152],"generalization":[153],"capabilities":[154],"than":[155],"features":[157],"trained":[161],"using":[162],"just":[163],"single-microphone":[164],"data.":[166]},"counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
