{"id":"https://openalex.org/W2130640900","doi":"https://doi.org/10.1109/tasl.2009.2017438","title":"Environmental Sound Recognition With Time\u2013Frequency Audio Features","display_name":"Environmental Sound Recognition With Time\u2013Frequency Audio Features","publication_year":2009,"publication_date":"2009-06-25","ids":{"openalex":"https://openalex.org/W2130640900","doi":"https://doi.org/10.1109/tasl.2009.2017438","mag":"2130640900"},"language":"en","primary_location":{"id":"doi:10.1109/tasl.2009.2017438","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tasl.2009.2017438","pdf_url":null,"source":{"id":"https://openalex.org/S199497470","display_name":"IEEE Transactions on Audio Speech and Language Processing","issn_l":"1558-7916","issn":["1558-7916","1558-7924"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112776516","display_name":"Selina Chu","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Selina Chu","raw_affiliation_strings":["Department of Computer Science and Signal and Image Processing Institute, University of Southern California, Los Angeles, CA, USA","Department of Computer Science, University of Southern California, Los Angeles, CA#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Signal and Image Processing Institute, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Department of Computer Science, University of Southern California, Los Angeles, CA#TAB#","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103555640","display_name":"Shrikanth Narayanan","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shrikanth Narayanan","raw_affiliation_strings":["Ming Hsieh Department of Electrical Engineering, Department of Computer Science and Signal and Image Processing Institute, University of Southern California, Los Angeles, CA, USA","Ming Hsieh Department of Electrical Engineering, Department of Computer Science, Signal and Image Processing Institute, University of Southern California, Los Angeles, CA#TAB#"],"affiliations":[{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, Department of Computer Science and Signal and Image Processing Institute, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, Department of Computer Science, Signal and Image Processing Institute, University of Southern California, Los Angeles, CA#TAB#","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001082656","display_name":"C.\u2010C. Jay Kuo","orcid":"https://orcid.org/0000-0001-9474-5035"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C.-C. Jay Kuo","raw_affiliation_strings":["Ming Hsieh Department of Electrical Engineering, Department of Computer Science and Signal and Image Processing Institute, University of Southern California, Los Angeles, CA, USA","Ming Hsieh Department of Electrical Engineering, Department of Computer Science, Signal and Image Processing Institute, University of Southern California, Los Angeles, CA#TAB#"],"affiliations":[{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, Department of Computer Science and Signal and Image Processing Institute, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, Department of Computer Science, Signal and Image Processing Institute, University of Southern California, Los Angeles, CA#TAB#","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5112776516"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":28.5679,"has_fulltext":false,"cited_by_count":634,"citation_normalized_percentile":{"value":0.99900531,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"17","issue":"6","first_page":"1142","last_page":"1158"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11665","display_name":"Animal Vocal Communication and Behavior","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.8320705890655518},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7768762707710266},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6447750926017761},{"id":"https://openalex.org/keywords/environmental-noise","display_name":"Environmental noise","score":0.5843033790588379},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.574859082698822},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5034264922142029},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49667268991470337},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.4855920672416687},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.48542818427085876},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.45976465940475464},{"id":"https://openalex.org/keywords/frequency-domain","display_name":"Frequency domain","score":0.4574216902256012},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.43303927779197693},{"id":"https://openalex.org/keywords/sound","display_name":"Sound (geography)","score":0.2370789647102356},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.1919790804386139},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.1442624032497406},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.06861227750778198}],"concepts":[{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.8320705890655518},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7768762707710266},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6447750926017761},{"id":"https://openalex.org/C86781634","wikidata":"https://www.wikidata.org/wiki/Q2478325","display_name":"Environmental noise","level":3,"score":0.5843033790588379},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.574859082698822},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5034264922142029},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49667268991470337},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.4855920672416687},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.48542818427085876},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.45976465940475464},{"id":"https://openalex.org/C19118579","wikidata":"https://www.wikidata.org/wiki/Q786423","display_name":"Frequency domain","level":2,"score":0.4574216902256012},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.43303927779197693},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.2370789647102356},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.1919790804386139},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.1442624032497406},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.06861227750778198},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tasl.2009.2017438","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tasl.2009.2017438","pdf_url":null,"source":{"id":"https://openalex.org/S199497470","display_name":"IEEE Transactions on Audio Speech and Language Processing","issn_l":"1558-7916","issn":["1558-7916","1558-7924"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","score":0.7300000190734863,"display_name":"Life in Land"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W78111734","https://openalex.org/W206961783","https://openalex.org/W1539282749","https://openalex.org/W1543582984","https://openalex.org/W1552898570","https://openalex.org/W1554663460","https://openalex.org/W1555402191","https://openalex.org/W1560013842","https://openalex.org/W1567662521","https://openalex.org/W1967769980","https://openalex.org/W1971039529","https://openalex.org/W1986931325","https://openalex.org/W2022554507","https://openalex.org/W2029049232","https://openalex.org/W2048174296","https://openalex.org/W2058189943","https://openalex.org/W2073295754","https://openalex.org/W2094240301","https://openalex.org/W2099732180","https://openalex.org/W2099799663","https://openalex.org/W2101789093","https://openalex.org/W2104680684","https://openalex.org/W2106339924","https://openalex.org/W2111730426","https://openalex.org/W2124864596","https://openalex.org/W2126109423","https://openalex.org/W2128659236","https://openalex.org/W2130576723","https://openalex.org/W2133824856","https://openalex.org/W2135342008","https://openalex.org/W2137343183","https://openalex.org/W2137561966","https://openalex.org/W2141660238","https://openalex.org/W2144345993","https://openalex.org/W2144609682","https://openalex.org/W2147174722","https://openalex.org/W2151693816","https://openalex.org/W2161036973","https://openalex.org/W2161755617","https://openalex.org/W2165409492","https://openalex.org/W2165733362","https://openalex.org/W2167716329","https://openalex.org/W2306347658","https://openalex.org/W6603154609","https://openalex.org/W6608422667","https://openalex.org/W6632519248","https://openalex.org/W6633152265","https://openalex.org/W6633241177","https://openalex.org/W6675827131","https://openalex.org/W6698306295"],"related_works":["https://openalex.org/W2475724061","https://openalex.org/W2773393136","https://openalex.org/W2174706483","https://openalex.org/W4317383455","https://openalex.org/W2548511587","https://openalex.org/W4293232884","https://openalex.org/W2422472940","https://openalex.org/W2019475500","https://openalex.org/W2548162870","https://openalex.org/W2138847091"],"abstract_inverted_index":{"The":[0,110,132],"paper":[1],"considers":[2],"the":[3,10,32,40,100,139,156],"task":[4],"of":[5,12,23,49,53,116,130,158],"recognizing":[6],"environmental":[7,148,164],"sounds":[8,52],"for":[9,28,92,118,147,162],"understanding":[11],"a":[13,60,114,123],"scene":[14],"or":[15],"context":[16],"surrounding":[17],"an":[18,88],"audio":[19,29,41,82,93],"sensor.":[20],"A":[21],"variety":[22],"features":[24,74,141,161],"have":[25,75],"been":[26,76],"proposed":[27],"recognition,":[30],"including":[31,167],"popular":[33],"Mel-frequency":[34],"cepstral":[35],"coefficients":[36],"(MFCCs)":[37],"which":[38,55],"describe":[39],"spectral":[42],"shape.":[43],"Environmental":[44],"sounds,":[45],"such":[46,80],"as":[47,184],"chirpings":[48],"insects":[50],"and":[51,96,126],"rain":[54],"are":[56,152],"typically":[57],"noise-like":[58],"with":[59],"broad":[61],"flat":[62],"spectrum,":[63],"may":[64],"include":[65],"strong":[66],"temporal":[67],"domain":[68],"signatures.":[69],"However,":[70],"only":[71],"few":[72],"temporal-domain":[73],"developed":[77],"to":[78,98,105,137,142,154,170,180],"characterize":[79],"diverse":[81],"signals":[83],"previously.":[84],"Here,":[85],"we":[86],"perform":[87],"empirical":[89],"feature":[90,119,134],"analysis":[91],"environment":[94],"characterization":[95],"propose":[97],"use":[99],"matching":[101],"pursuit":[102],"(MP)":[103],"algorithm":[104],"obtain":[106],"effective":[107],"time-frequency":[108],"features.":[109,131],"MP-based":[111,133],"method":[112],"utilizes":[113],"dictionary":[115],"atoms":[117],"selection,":[120],"resulting":[121],"in":[122],"flexible,":[124],"intuitive":[125],"physically":[127],"interpretable":[128],"set":[129],"is":[135],"adopted":[136],"supplement":[138],"MFCC":[140],"yield":[143],"higher":[144],"recognition":[145,173,176],"accuracy":[146],"sounds.":[149],"Extensive":[150],"experiments":[151],"conducted":[153],"demonstrate":[155],"effectiveness":[157],"these":[159],"joint":[160],"unstructured":[163],"sound":[165],"classification,":[166],"listening":[168],"tests":[169],"study":[171],"human":[172,185],"capabilities.":[174],"Our":[175],"system":[177],"has":[178],"shown":[179],"produce":[181],"comparable":[182],"performance":[183],"listeners.":[186]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":27},{"year":2024,"cited_by_count":33},{"year":2023,"cited_by_count":33},{"year":2022,"cited_by_count":48},{"year":2021,"cited_by_count":49},{"year":2020,"cited_by_count":38},{"year":2019,"cited_by_count":68},{"year":2018,"cited_by_count":37},{"year":2017,"cited_by_count":52},{"year":2016,"cited_by_count":38},{"year":2015,"cited_by_count":43},{"year":2014,"cited_by_count":43},{"year":2013,"cited_by_count":37},{"year":2012,"cited_by_count":36}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
