{"id":"https://openalex.org/W4319777521","doi":"https://doi.org/10.1109/tai.2023.3243596","title":"Audio Representation Learning by Distilling Video as Privileged Information","display_name":"Audio Representation Learning by Distilling Video as Privileged Information","publication_year":2023,"publication_date":"2023-02-09","ids":{"openalex":"https://openalex.org/W4319777521","doi":"https://doi.org/10.1109/tai.2023.3243596"},"language":"en","primary_location":{"id":"doi:10.1109/tai.2023.3243596","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2023.3243596","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062316928","display_name":"Amirhossein Hajavi","orcid":"https://orcid.org/0000-0003-1663-6647"},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Amirhossein Hajavi","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Queen&#x0027;s University at Kingston, Kingston, ON, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Queen&#x0027;s University at Kingston, Kingston, ON, Canada","institution_ids":["https://openalex.org/I204722609"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039812985","display_name":"Ali Etemad","orcid":"https://orcid.org/0000-0001-7128-0220"},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ali Etemad","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Queen&#x0027;s University at Kingston, Kingston, ON, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Queen&#x0027;s University at Kingston, Kingston, ON, Canada","institution_ids":["https://openalex.org/I204722609"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5062316928"],"corresponding_institution_ids":["https://openalex.org/I204722609"],"apc_list":null,"apc_paid":null,"fwci":1.2226,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.77928013,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"5","issue":"1","first_page":"446","last_page":"456"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.605570375919342},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5790659189224243},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.43611517548561096},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4086466133594513},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.05256664752960205}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.605570375919342},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5790659189224243},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.43611517548561096},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4086466133594513},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.05256664752960205},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tai.2023.3243596","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2023.3243596","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8600000143051147,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G12658622","display_name":null,"funder_award_id":"CRDPJ 533919-18","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"}],"funders":[{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W2067562626","https://openalex.org/W2146334809","https://openalex.org/W2194775991","https://openalex.org/W2604863233","https://openalex.org/W2731516819","https://openalex.org/W2743289088","https://openalex.org/W2748787960","https://openalex.org/W2752782242","https://openalex.org/W2798599891","https://openalex.org/W2803193013","https://openalex.org/W2809440904","https://openalex.org/W2885895075","https://openalex.org/W2886300652","https://openalex.org/W2887783173","https://openalex.org/W2916104401","https://openalex.org/W2963368804","https://openalex.org/W2963801643","https://openalex.org/W2964054038","https://openalex.org/W2964111476","https://openalex.org/W2971047694","https://openalex.org/W2972627751","https://openalex.org/W2972811324","https://openalex.org/W2981087920","https://openalex.org/W2981694290","https://openalex.org/W2982157312","https://openalex.org/W2996906606","https://openalex.org/W3015734344","https://openalex.org/W3015764649","https://openalex.org/W3016092640","https://openalex.org/W3108075360","https://openalex.org/W3108124733","https://openalex.org/W3108549452","https://openalex.org/W3160554450","https://openalex.org/W3174102142","https://openalex.org/W3177378457","https://openalex.org/W3184648662","https://openalex.org/W3198298452","https://openalex.org/W3198791972","https://openalex.org/W4318148717","https://openalex.org/W6631190155","https://openalex.org/W6637373629","https://openalex.org/W6637551013","https://openalex.org/W6638523607","https://openalex.org/W6691815588","https://openalex.org/W6740493225","https://openalex.org/W6748082341","https://openalex.org/W6769906912","https://openalex.org/W6842607031"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Deep":[0],"audio":[1,53,294],"representation":[2,54,295],"learning":[3,55,79],"using":[4,56,80,197,256],"multimodal":[5,41],"audiovisual":[6,57,266],"data":[7,58,125,138],"often":[8],"leads":[9],"to":[10,15,34,111,181,238,243],"a":[11,48,166],"better":[12],"performance":[13,35],"compared":[14],"unimodal":[16],"approaches.":[17],"However,":[18],"in":[19,98,120,300],"real-world":[20],"scenarios,":[21],"both":[22,153],"modalities":[23],"are":[24,129,143,159],"not":[25],"always":[26],"available":[27],"at":[28,65],"the":[29,60,76,85,96,109,113,127,140,150,154,171,175,179,183,186,189,193,201,204,215,218,229,232,236,240,244,247,251,283,290],"time":[30],"of":[31,78,161,178,185,192,225,235,250,293],"inference,":[32],"leading":[33],"degradation":[36],"by":[37,95,108,174,228,281],"models":[38],"trained":[39,196,255],"for":[40,51,89,289],"inference.":[42,66],"In":[43,149,200],"this":[44,68],"article,":[45],"we":[46,70,102,278],"propose":[47],"novel":[49],"approach":[50],"deep":[52],"when":[59],"video":[61,284],"modality":[62,285],"is":[63,195,212,254],"absent":[64],"For":[67],"purpose,":[69],"adopt":[71],"teacher\u2013student":[72],"knowledge":[73],"distillation":[74],"under":[75],"framework":[77,262],"privileged":[81,287],"information":[82,288],"(LUPI).":[83],"While":[84],"previous":[86],"methods":[87],"proposed":[88,100],"LUPI":[90],"use":[91,103,170,222,313],"soft":[92],"labels":[93],"generated":[94],"teacher,":[97],"our":[99,118,261,297],"method,":[101],"<italic":[104],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[105],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">embeddings</i>":[106],"learned":[107],"teacher":[110,155,180,237],"train":[112,182,239],"student":[114,157,194,252],"network.":[115],"We":[116,169,221,259],"integrate":[117],"method":[119,298],"two":[121,223,264],"different":[122,265],"settings:":[123],"sequential":[124,202],"where":[126,139],"features":[128,142],"divided":[130],"into":[131],"multiple":[132],"segments":[133],"throughout":[134],"time,":[135],"and":[136,156,165,217,231,271],"nonsequential":[137,151,245],"entire":[141],"treated":[144],"as":[145,286,307,309],"one":[146],"whole":[147],"segment.":[148],"setting,":[152,203,246],"networks":[158,205],"comprised":[160],"an":[162,207],"encoder":[163,176,184,216,230],"component":[164,177,210,234],"task":[167,190,219,248],"header.":[168,220],"embeddings":[172,226],"produced":[173,227],"student,":[187],"while":[188],"header":[191,249],"ground-truth":[198,257],"labels.":[199,258],"have":[206],"additional":[208],"aggregation":[209,233],"that":[211,280,312],"placed":[213],"between":[214],"sets":[224],"student.":[241],"Similar":[242],"network":[253],"test":[260],"on":[263],"tasks,":[267],"namely,":[268],"speaker":[269],"recognition":[270,306],"speech":[272],"emotion":[273],"recognition.":[274],"Through":[275],"these":[276],"experiments,":[277],"show":[279],"treating":[282],"main":[291],"goal":[292],"learning,":[296],"results":[299],"considerable":[301],"improvements":[302],"over":[303],"sole":[304],"audio-based":[305],"well":[308],"prior":[310],"works":[311],"LUPI.":[314]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
