{"id":"https://openalex.org/W3159926276","doi":"https://doi.org/10.1109/waspaa52581.2021.9632782","title":"Identifying Actions for Sound Event Classification","display_name":"Identifying Actions for Sound Event Classification","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3159926276","doi":"https://doi.org/10.1109/waspaa52581.2021.9632782","mag":"3159926276"},"language":"en","primary_location":{"id":"doi:10.1109/waspaa52581.2021.9632782","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa52581.2021.9632782","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2104.12693","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073250019","display_name":"Benjamin Elizalde","orcid":"https://orcid.org/0000-0001-6461-5790"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]},{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["GB","US"],"is_corresponding":true,"raw_author_name":"Benjamin Elizalde","raw_affiliation_strings":["Carnegie Mellon University","Microsoft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Microsoft","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075140860","display_name":"Radu Revutchi","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Radu Revutchi","raw_affiliation_strings":["Carnegie Mellon University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110454109","display_name":"Samarjit Das","orcid":null},"institutions":[{"id":"https://openalex.org/I4210120115","display_name":"Robert Bosch (United States)","ror":"https://ror.org/02venad53","country_code":"US","type":"company","lineage":["https://openalex.org/I4210120115","https://openalex.org/I889804353"]},{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samarjit Das","raw_affiliation_strings":["Bosch Research Pittsburgh","Carnegie Mellon University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bosch Research Pittsburgh","institution_ids":["https://openalex.org/I4210120115"]},{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113017615","display_name":"Bhiksha Raj","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bhiksha Raj","raw_affiliation_strings":["Carnegie Mellon University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028182466","display_name":"Ian Lane","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ian Lane","raw_affiliation_strings":["Carnegie Mellon University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008695422","display_name":"Laurie M. Heller","orcid":"https://orcid.org/0000-0002-4735-5701"},"institutions":[{"id":"https://openalex.org/I4210145457","display_name":"Robert Bosch (Taiwan)","ror":"https://ror.org/046as2g47","country_code":"TW","type":"company","lineage":["https://openalex.org/I4210145457","https://openalex.org/I889804353"]},{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["TW","US"],"is_corresponding":false,"raw_author_name":"Laurie M. Heller","raw_affiliation_strings":["Carnegie Mellon University","Robert Bosch GmBH"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Robert Bosch GmBH","institution_ids":["https://openalex.org/I4210145457"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5073250019"],"corresponding_institution_ids":["https://openalex.org/I4210164937","https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":0.1542,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.41570779,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"26","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10788","display_name":"Neuroscience and Music Perception","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.7707325220108032},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7150187492370605},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.705621600151062},{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.6590192914009094},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5867536664009094},{"id":"https://openalex.org/keywords/sound","display_name":"Sound (geography)","score":0.543838620185852},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5429072976112366},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5194424986839294},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.43522322177886963},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4016655683517456},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.10704895853996277},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.08794105052947998}],"concepts":[{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.7707325220108032},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7150187492370605},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.705621600151062},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.6590192914009094},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5867536664009094},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.543838620185852},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5429072976112366},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5194424986839294},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43522322177886963},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4016655683517456},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.10704895853996277},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.08794105052947998},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/waspaa52581.2021.9632782","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa52581.2021.9632782","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2104.12693","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.12693","pdf_url":"https://arxiv.org/pdf/2104.12693","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3159926276","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2104.12693.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2104.12693","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2104.12693","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2104.12693","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.12693","pdf_url":"https://arxiv.org/pdf/2104.12693","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321739","display_name":"Consejo Nacional de Ciencia y Tecnolog\u00eda","ror":"https://ror.org/059ex5q34"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3159926276.pdf","grobid_xml":"https://content.openalex.org/works/W3159926276.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W128363437","https://openalex.org/W1460854341","https://openalex.org/W1989892032","https://openalex.org/W2006292052","https://openalex.org/W2032337854","https://openalex.org/W2052666245","https://openalex.org/W2056478665","https://openalex.org/W2057338880","https://openalex.org/W2081913879","https://openalex.org/W2132052974","https://openalex.org/W2143483079","https://openalex.org/W2164941461","https://openalex.org/W2191779130","https://openalex.org/W2507728665","https://openalex.org/W2745357708","https://openalex.org/W2758074877","https://openalex.org/W2759822254","https://openalex.org/W2939574508","https://openalex.org/W2963485731","https://openalex.org/W2964345931","https://openalex.org/W3208972679","https://openalex.org/W6605139983","https://openalex.org/W6628533439","https://openalex.org/W6675354045","https://openalex.org/W6676396690","https://openalex.org/W6687152286","https://openalex.org/W6724851771","https://openalex.org/W7008761407"],"related_works":["https://openalex.org/W2807977755","https://openalex.org/W2930959424","https://openalex.org/W1852789990","https://openalex.org/W3137857706","https://openalex.org/W3115250497","https://openalex.org/W3099638501","https://openalex.org/W3159929162","https://openalex.org/W2979923291","https://openalex.org/W3040260790","https://openalex.org/W1965851527","https://openalex.org/W2809767522","https://openalex.org/W2951804919","https://openalex.org/W2991487804","https://openalex.org/W2913960490","https://openalex.org/W3159642309","https://openalex.org/W2167905777","https://openalex.org/W2549952655","https://openalex.org/W1543347227","https://openalex.org/W2963288440","https://openalex.org/W2912641852"],"abstract_inverted_index":{"In":[0,11],"Psychology,":[1],"actions":[2,28,59,75,104],"are":[3],"paramount":[4],"for":[5,53,99,111],"humans":[6],"to":[7,37,42,70,105,119],"identify":[8,73],"sound":[9,44,89,109],"events.":[10],"Machine":[12],"Learning":[13],"(ML),":[14],"action":[15],"recognition":[16],"achieves":[17],"high":[18],"accuracy;":[19],"however,":[20],"it":[21],"has":[22],"not":[23],"been":[24],"asked":[25],"whether":[26],"identifying":[27],"can":[29],"benefit":[30],"Sound":[31],"Event":[32],"Classification":[33],"(SEC),":[34],"as":[35],"opposed":[36],"mapping":[38],"the":[39,87,92,112,132,156,166],"audio":[40,101,138,145,148],"directly":[41],"a":[43,49,106],"event.":[45],"Therefore,":[46],"we":[47,67,159],"propose":[48],"new":[50],"Psychology-inspired":[51],"approach":[52],"SEC":[54,129],"that":[55,76],"includes":[56],"identification":[57],"of":[58,86,108,137,155,165],"via":[60],"human":[61],"listeners.":[62],"To":[63],"achieve":[64],"this":[65],"goal,":[66],"used":[68,118],"crowdsourcing":[69],"have":[71,83],"listeners":[72],"20":[74],"in":[77,80,91],"isolation":[78],"or":[79],"combination":[81],"may":[82],"produced":[84],"any":[85],"50":[88],"events":[90,110],"well-studied":[93],"dataset":[94],"ESC-50.":[95],"The":[96,115],"resulting":[97],"annotations":[98,116],"each":[100],"recording":[102],"relate":[103],"database":[107],"first":[113],"time.":[114],"were":[117],"create":[120],"semantic":[121],"representations":[122],"called":[123],"Action":[124],"Vectors":[125],"(AVs).":[126],"We":[127],"evaluated":[128],"by":[130],"comparing":[131],"AVs":[133,151],"with":[134],"two":[135],"types":[136],"features":[139,149],"-":[140],"log-mel":[141],"spectrograms":[142],"and":[143,150,162],"state-of-the-art":[144],"embeddings.":[146],"Because":[147],"capture":[152],"different":[153],"abstractions":[154],"acoustic":[157],"content,":[158],"combined":[160],"them":[161],"achieved":[163],"one":[164],"highest":[167],"reported":[168],"accuracies":[169],"(88%).":[170]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-10-10T00:00:00"}
