{"id":"https://openalex.org/W4392902665","doi":"https://doi.org/10.1109/icassp48485.2024.10447183","title":"Exploring Large Scale Pre-Trained Models for Robust Machine Anomalous Sound Detection","display_name":"Exploring Large Scale Pre-Trained Models for Robust Machine Anomalous Sound Detection","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392902665","doi":"https://doi.org/10.1109/icassp48485.2024.10447183"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447183","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447183","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100690517","display_name":"Bing Han","orcid":"https://orcid.org/0000-0002-6319-6755"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bing Han","raw_affiliation_strings":["Shanghai Jiao Tong University,Auditory Cognition and Computational Acoustics Lab,Department of Computer Science and Engineering,Shanghai,China","Department of Computer Science and Engineering, Auditory Cognition and Computational Acoustics Lab, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Auditory Cognition and Computational Acoustics Lab,Department of Computer Science and Engineering,Shanghai,China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Auditory Cognition and Computational Acoustics Lab, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102915022","display_name":"Zhiqiang Lv","orcid":"https://orcid.org/0000-0003-2875-6486"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhiqiang Lv","raw_affiliation_strings":["Huakong AI Plus Company Limited,Beijing,China","Huakong AI Plus Company Limited, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Huakong AI Plus Company Limited,Beijing,China","institution_ids":[]},{"raw_affiliation_string":"Huakong AI Plus Company Limited, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032861798","display_name":"Anbai Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Anbai Jiang","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004675212","display_name":"Wen Huang","orcid":"https://orcid.org/0000-0001-7682-4354"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Huang","raw_affiliation_strings":["Shanghai Jiao Tong University,Auditory Cognition and Computational Acoustics Lab,Department of Computer Science and Engineering,Shanghai,China","Department of Computer Science and Engineering, Auditory Cognition and Computational Acoustics Lab, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Auditory Cognition and Computational Acoustics Lab,Department of Computer Science and Engineering,Shanghai,China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Auditory Cognition and Computational Acoustics Lab, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101416769","display_name":"Zhengyang Chen","orcid":"https://orcid.org/0000-0003-1293-8146"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhengyang Chen","raw_affiliation_strings":["Shanghai Jiao Tong University,Auditory Cognition and Computational Acoustics Lab,Department of Computer Science and Engineering,Shanghai,China","Department of Computer Science and Engineering, Auditory Cognition and Computational Acoustics Lab, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Auditory Cognition and Computational Acoustics Lab,Department of Computer Science and Engineering,Shanghai,China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Auditory Cognition and Computational Acoustics Lab, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018186116","display_name":"Yufeng Deng","orcid":"https://orcid.org/0000-0003-2153-8494"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yufeng Deng","raw_affiliation_strings":["Huakong AI Plus Company Limited,Beijing,China","Huakong AI Plus Company Limited, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Huakong AI Plus Company Limited,Beijing,China","institution_ids":[]},{"raw_affiliation_string":"Huakong AI Plus Company Limited, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101824767","display_name":"Jiawei Ding","orcid":"https://orcid.org/0000-0001-7417-7085"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiawei Ding","raw_affiliation_strings":["Huakong AI Plus Company Limited,Beijing,China","Huakong AI Plus Company Limited, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Huakong AI Plus Company Limited,Beijing,China","institution_ids":[]},{"raw_affiliation_string":"Huakong AI Plus Company Limited, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007660467","display_name":"Cheng L\u00fc","orcid":"https://orcid.org/0000-0003-1746-7697"},"institutions":[{"id":"https://openalex.org/I153473198","display_name":"North China Electric Power University","ror":"https://ror.org/04qr5t414","country_code":"CN","type":"education","lineage":["https://openalex.org/I153473198"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Lu","raw_affiliation_strings":["North China Electric Power University,School of Economics and Management,Beijing,China","School of Economics and Management, North China Electric Power University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"North China Electric Power University,School of Economics and Management,Beijing,China","institution_ids":["https://openalex.org/I153473198"]},{"raw_affiliation_string":"School of Economics and Management, North China Electric Power University, Beijing, China","institution_ids":["https://openalex.org/I153473198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100692904","display_name":"Wei-Qiang Zhang","orcid":"https://orcid.org/0000-0003-3841-1959"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei-Qiang Zhang","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079233004","display_name":"Pingyi Fan","orcid":"https://orcid.org/0000-0002-0658-6079"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pingyi Fan","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100409741","display_name":"Jia Liu","orcid":"https://orcid.org/0000-0003-0383-0934"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jia Liu","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100341993","display_name":"Yanmin Qian","orcid":"https://orcid.org/0000-0002-0314-3790"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanmin Qian","raw_affiliation_strings":["Shanghai Jiao Tong University,Auditory Cognition and Computational Acoustics Lab,Department of Computer Science and Engineering,Shanghai,China","Department of Computer Science and Engineering, Auditory Cognition and Computational Acoustics Lab, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Auditory Cognition and Computational Acoustics Lab,Department of Computer Science and Engineering,Shanghai,China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Auditory Cognition and Computational Acoustics Lab, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5100690517"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":8.6258,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.98463433,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1326","last_page":"1330"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8156046867370605},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6757173538208008},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.6399096250534058},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6256715059280396},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.5175725221633911},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4953221380710602},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4917047619819641},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.4916148781776428},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4855024814605713},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.44775858521461487},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33445507287979126},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.226394385099411},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06775185465812683}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8156046867370605},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6757173538208008},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.6399096250534058},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6256715059280396},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.5175725221633911},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4953221380710602},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4917047619819641},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.4916148781776428},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4855024814605713},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.44775858521461487},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33445507287979126},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.226394385099411},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06775185465812683},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447183","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447183","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/13","display_name":"Climate action","score":0.4399999976158142}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2407080277","https://openalex.org/W2896457183","https://openalex.org/W2969985801","https://openalex.org/W2979476256","https://openalex.org/W3015356122","https://openalex.org/W3092451850","https://openalex.org/W3162150435","https://openalex.org/W3206189675","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W4224917958","https://openalex.org/W4253461361","https://openalex.org/W4372260191","https://openalex.org/W4372260505","https://openalex.org/W4385823104","https://openalex.org/W4388117975","https://openalex.org/W4399597916","https://openalex.org/W6780218876","https://openalex.org/W6784066080","https://openalex.org/W6788335241"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2159052453","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W2953234277","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W4386815338","https://openalex.org/W2145836866"],"abstract_inverted_index":{"Machine":[0],"anomalous":[1,38,150],"sound":[2,39,151],"detection":[3,40],"is":[4],"a":[5,35,82,101,133],"useful":[6],"technique":[7],"for":[8,75,142],"various":[9],"applications,":[10],"but":[11],"it":[12],"often":[13],"suffers":[14],"from":[15,59],"poor":[16],"generalization":[17,145],"due":[18],"to":[19,56,94],"the":[20,57,63,69,73,92,96,107,114,123,137,144],"challenges":[21],"of":[22,62,91,109,139,148,155],"data":[23,83],"collection":[24],"and":[25,66,128,146],"complex":[26],"acoustic":[27],"environment.":[28],"To":[29],"address":[30],"this":[31],"issue,":[32],"we":[33,52,80,99,157],"propose":[34],"robust":[36],"machine":[37,93,149],"model":[41,65],"that":[42,86,105,118],"leverages":[43],"self-supervised":[44,76],"pre-trained":[45,64],"models":[46],"on":[47,113],"large-scale":[48,140],"speech":[49],"data.":[50],"Specifically,":[51],"assign":[53],"different":[54,60,88,110],"weights":[55],"features":[58,108],"layers":[61],"then":[67],"use":[68],"working":[70],"condition":[71],"as":[72],"label":[74],"classification":[77],"fine-tuning.":[78],"Moreover,":[79],"introduce":[81],"augmentation":[84],"method":[85,104,121],"simulates":[87],"operating":[89],"states":[90],"enrich":[95],"dataset.":[97],"Furthermore,":[98],"devise":[100],"transformer":[102],"pooling":[103],"fuses":[106],"segments.":[111],"Experiments":[112],"DCASE2023":[115],"dataset":[116],"show":[117],"our":[119],"proposed":[120],"outperforms":[122],"commonly":[124],"used":[125],"reconstruction-based":[126],"autoencoder":[127],"classification-based":[129],"convolutional":[130],"network":[131],"by":[132],"large":[134],"margin,":[135],"demonstrating":[136],"effectiveness":[138],"pre-training":[141],"enhancing":[143],"robustness":[147],"detection.":[152],"In":[153],"Task2":[154],"DCASE2023,":[156],"achieve":[158],"2nd":[159],"place":[160],"with":[161],"these":[162],"methods.":[163]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
