{"id":"https://openalex.org/W2807977755","doi":"https://doi.org/10.1145/3206025.3206067","title":"Class-aware Self-Attention for Audio Event Recognition","display_name":"Class-aware Self-Attention for Audio Event Recognition","publication_year":2018,"publication_date":"2018-06-05","ids":{"openalex":"https://openalex.org/W2807977755","doi":"https://doi.org/10.1145/3206025.3206067","mag":"2807977755"},"language":"en","primary_location":{"id":"doi:10.1145/3206025.3206067","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3206025.3206067","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM on International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101776086","display_name":"Shizhe Chen","orcid":"https://orcid.org/0000-0002-7313-9703"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shizhe Chen","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086814847","display_name":"Jia Chen","orcid":"https://orcid.org/0000-0002-6350-6610"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jia Chen","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009985839","display_name":"Qin Jin","orcid":"https://orcid.org/0000-0001-6486-6020"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qin Jin","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5107836252","display_name":"Alexander G. Hauptmann","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Hauptmann","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101776086"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":1.6514,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.84930054,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"28","last_page":"36"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8888797163963318},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.779098391532898},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.7060657739639282},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5764167308807373},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5098180174827576},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5044859647750854},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4348784387111664},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4326738119125366},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.43081021308898926},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3351028263568878},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.18709063529968262},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.11945545673370361}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8888797163963318},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.779098391532898},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.7060657739639282},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5764167308807373},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5098180174827576},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5044859647750854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4348784387111664},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4326738119125366},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.43081021308898926},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3351028263568878},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.18709063529968262},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.11945545673370361},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3206025.3206067","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3206025.3206067","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM on International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.699999988079071}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1514535095","https://openalex.org/W1535031652","https://openalex.org/W1586939924","https://openalex.org/W2052384514","https://openalex.org/W2064675550","https://openalex.org/W2077644276","https://openalex.org/W2133564696","https://openalex.org/W2147527908","https://openalex.org/W2150769028","https://openalex.org/W2295107390","https://openalex.org/W2354870669","https://openalex.org/W2396707013","https://openalex.org/W2407005679","https://openalex.org/W2412505962","https://openalex.org/W2526050071","https://openalex.org/W2544224704","https://openalex.org/W2548844710","https://openalex.org/W2593116425","https://openalex.org/W2595120084","https://openalex.org/W2618269622","https://openalex.org/W2625297138","https://openalex.org/W2775505379","https://openalex.org/W2787420051","https://openalex.org/W3103314642"],"related_works":["https://openalex.org/W2539919382","https://openalex.org/W22517275","https://openalex.org/W2042419400","https://openalex.org/W2122924390","https://openalex.org/W2064347532","https://openalex.org/W2131711534","https://openalex.org/W2589081144","https://openalex.org/W2171620991","https://openalex.org/W4387294562","https://openalex.org/W2293401412"],"abstract_inverted_index":{"Audio":[0],"event":[1,26,117,123,229,254],"recognition":[2,27,255],"(AER)":[3],"has":[4],"been":[5],"an":[6,129],"important":[7],"research":[8],"problem":[9],"with":[10,65,106,233,257,264,283],"a":[11,67,101,120,208,226,284],"wide":[12],"range":[13],"of":[14,47,59,74,128,246],"applications.":[15],"However,":[16],"it":[17,81],"is":[18,62,207,276],"very":[19],"challenging":[20],"to":[21,83,88,110,145,151,161,180,199,278],"develop":[22],"large":[23,72],"scale":[24],"audio":[25,39,48,60,75,90,116,122,131,228,253,281,296],"models.":[28],"On":[29,53],"the":[30,54,57,139,156,173,185,191,200,217,244,251,268,293],"one":[31],"hand,":[32,56],"usually":[33],"there":[34],"are":[35],"only":[36,44,66,124],"\"weak\"":[37],"labeled":[38],"training":[40,286],"data":[41,177],"available,":[42],"which":[43,189,249],"contains":[45],"labels":[46],"events":[49,61,91,198,263,282],"without":[50,291],"temporal":[51,136,148],"boundaries.":[52],"other":[55,196],"distribution":[58],"generally":[63],"long-tailed,":[64],"few":[68,285],"positive":[69],"samples":[70],"for":[71,93,115,166,262],"amounts":[73],"events.":[76,95,202,297],"These":[77],"two":[78],"issues":[79],"make":[80],"hard":[82],"learn":[84,162,279],"discriminative":[85,112],"acoustic":[86,238,259],"features":[87,114,260],"recognize":[89],"especially":[92,261],"long-tailed":[94,168],"In":[96,159],"this":[97],"paper,":[98],"we":[99,170],"propose":[100],"novel":[102],"class-aware":[103,141],"self-attention":[104,142],"mechanism":[105,206],"attention":[107,163,182,187,205],"factor":[108],"sharing":[109],"generate":[111],"clip-level":[113],"recognition.":[118],"Since":[119],"target":[121],"occurs":[125],"in":[126,184,216],"part":[127],"entire":[130],"clip":[132],"and":[133,150,176,211,236,289],"its":[134],"corresponding":[135],"interval":[137],"varies,":[138],"proposed":[140,186,204,274],"approach":[143],"learns":[144],"highlight":[146],"relevant":[147],"intervals":[149],"suppress":[152],"irrelevant":[153],"noises":[154],"at":[155],"same":[157],"time.":[158],"order":[160],"patterns":[164],"effectively":[165,288],"those":[167],"events,":[169],"combine":[171],"both":[172,234],"domain":[174],"knowledge":[175,193],"driven":[178],"strategies":[179],"share":[181],"factors":[183],"mechanism,":[188],"transfers":[190],"common":[192],"learned":[194,295],"from":[195],"similar":[197],"rare":[201],"The":[203,240],"pluggable":[209],"component":[210],"can":[212],"be":[213],"trained":[214],"end-to-end":[215],"overall":[218,252],"AER":[219],"model.":[220],"We":[221],"evaluate":[222],"our":[223,247,273],"model":[224,275],"on":[225],"large-scale":[227],"corpus":[230],"\"Audio":[231],"Set\"":[232],"short-term":[235],"long-term":[237],"features.":[239],"experimental":[241],"results":[242],"demonstrate":[243],"effectiveness":[245],"model,":[248],"improves":[250],"performance":[256],"different":[258],"low":[265],"resources.":[266],"Moreover,":[267],"experiments":[269],"also":[270],"show":[271],"that":[272],"able":[277],"new":[280],"examples":[287],"efficiently":[290],"disturbing":[292],"previously":[294]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
