{"id":"https://openalex.org/W4390812911","doi":"https://doi.org/10.1109/taslp.2024.3353578","title":"Acoustic Scene Classification Across Cities and Devices via Feature Disentanglement","display_name":"Acoustic Scene Classification Across Cities and Devices via Feature Disentanglement","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4390812911","doi":"https://doi.org/10.1109/taslp.2024.3353578"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3353578","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3353578","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101555422","display_name":"Yizhou Tan","orcid":"https://orcid.org/0000-0002-4103-8115"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yizhou Tan","raw_affiliation_strings":["Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, Hubei, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074448137","display_name":"Haojun Ai","orcid":"https://orcid.org/0000-0002-4172-5070"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haojun Ai","raw_affiliation_strings":["Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, Hubei, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024084275","display_name":"Shengchen Li","orcid":"https://orcid.org/0000-0002-2488-298X"},"institutions":[{"id":"https://openalex.org/I69356397","display_name":"Xi\u2019an Jiaotong-Liverpool University","ror":"https://ror.org/03zmrmn05","country_code":"CN","type":"education","lineage":["https://openalex.org/I69356397"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengchen Li","raw_affiliation_strings":["Department of Intelligent Science, School of Advanced Technology, Xi&#x0027;an Jiaotong-Liverpool University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Department of Intelligent Science, School of Advanced Technology, Xi&#x0027;an Jiaotong-Liverpool University, Suzhou, China","institution_ids":["https://openalex.org/I69356397"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066967599","display_name":"Mark D. Plumbley","orcid":"https://orcid.org/0000-0002-9708-1075"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mark D. Plumbley","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing (CVSSP), University of Surrey, Guildford, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing (CVSSP), University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101555422"],"corresponding_institution_ids":["https://openalex.org/I37461747"],"apc_list":null,"apc_paid":null,"fwci":6.831,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.97583722,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"32","issue":null,"first_page":"1286","last_page":"1297"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11665","display_name":"Animal Vocal Communication and Behavior","score":0.9632999897003174,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.7315959930419922},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6909819841384888},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6892040967941284},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5360531806945801},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43375518918037415},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.397530198097229},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10513338446617126}],"concepts":[{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.7315959930419922},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6909819841384888},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6892040967941284},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5360531806945801},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43375518918037415},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.397530198097229},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10513338446617126},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/taslp.2024.3353578","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3353578","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:alma.44SUR_INST:11200330970002346","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4210197018","display_name":"View","issn_l":"2688-268X","issn":["2688-268X","2688-3988"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.8199999928474426,"display_name":"Sustainable cities and communities"}],"awards":[{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G479156690","display_name":null,"funder_award_id":"61971316","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7697158467","display_name":"AI for Sound","funder_award_id":"EP/T019751/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8527798790","display_name":null,"funder_award_id":"62001038","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8857457899","display_name":null,"funder_award_id":"EP/T019751/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W96659543","https://openalex.org/W1852255964","https://openalex.org/W2095147901","https://openalex.org/W2097187469","https://openalex.org/W2103235956","https://openalex.org/W2104094955","https://openalex.org/W2131953535","https://openalex.org/W2163922914","https://openalex.org/W2194775991","https://openalex.org/W2201792562","https://openalex.org/W2214802144","https://openalex.org/W2511956680","https://openalex.org/W2531409750","https://openalex.org/W2593768305","https://openalex.org/W2617002574","https://openalex.org/W2619623002","https://openalex.org/W2758584002","https://openalex.org/W2786808285","https://openalex.org/W2787600432","https://openalex.org/W2798813225","https://openalex.org/W2820243394","https://openalex.org/W2885948040","https://openalex.org/W2902772991","https://openalex.org/W2962687275","https://openalex.org/W2969893028","https://openalex.org/W2987999870","https://openalex.org/W2997076748","https://openalex.org/W2998269197","https://openalex.org/W3005501015","https://openalex.org/W3015384913","https://openalex.org/W3015530480","https://openalex.org/W3034892239","https://openalex.org/W3094550259","https://openalex.org/W3098136481","https://openalex.org/W3104924861","https://openalex.org/W3141797743","https://openalex.org/W3163309075","https://openalex.org/W3180707540","https://openalex.org/W3184333189","https://openalex.org/W3189951784","https://openalex.org/W3206392049","https://openalex.org/W4200172872","https://openalex.org/W4214633806","https://openalex.org/W4283726207","https://openalex.org/W4288375019","https://openalex.org/W6639480849","https://openalex.org/W6683633756","https://openalex.org/W6688325169","https://openalex.org/W6725448924","https://openalex.org/W6752516136","https://openalex.org/W6753511437","https://openalex.org/W6761152316","https://openalex.org/W6763070779","https://openalex.org/W6774827837","https://openalex.org/W6779341498","https://openalex.org/W6839321029"],"related_works":["https://openalex.org/W3147584709","https://openalex.org/W2977677679","https://openalex.org/W1992327129","https://openalex.org/W2381986121","https://openalex.org/W2370918718","https://openalex.org/W2256933480","https://openalex.org/W2027854990","https://openalex.org/W2370081953","https://openalex.org/W2033914206","https://openalex.org/W2042327336"],"abstract_inverted_index":{"Acoustic":[0],"Scene":[1],"Classification":[2],"(ASC)":[3],"is":[4,70,192],"a":[5,9,213],"task":[6],"that":[7,206],"classifies":[8],"scene":[10],"according":[11],"to":[12,53,61,72,122,212,235],"environmental":[13],"acoustic":[14],"signals.":[15],"Audios":[16],"collected":[17],"from":[18],"different":[19,219],"cities":[20,160],"and":[21,38,87,100,113,161,178,198,202,221],"devices":[22,79,220],"often":[23],"exhibit":[24],"biases":[25],"in":[26,146,152,158,195,232],"feature":[27,65,108,128,189,230],"distributions,":[28],"which":[29],"may":[30],"negatively":[31],"impact":[32],"ASC":[33,75,139,145,151,157,200,216,233],"performance.":[34,239],"Taking":[35],"the":[36,41,55,63,74,94,96,107,124,132,166,169,187,203,227],"city":[37,112],"device":[39],"of":[40,47,58,110,127,137,168,175,186,229],"audio":[42,56,208],"collection":[43],"as":[44],"two":[45],"types":[46],"data":[48],"domain,":[49],"this":[50],"paper":[51,224],"attempts":[52],"disentangle":[54],"features":[57,209],"each":[59,91],"domain":[60,118],"remove":[62],"related":[64],"biases.":[66],"A":[67],"dual-alignment":[68,133],"framework":[69],"proposed":[71,121,170,188],"generalize":[73],"system":[76,217],"on":[77,180],"new":[78,147,153,159,162],"or":[80],"cities,":[81],"by":[82],"aligning":[83],"boundaries":[84,89],"across":[85,218],"domains":[86],"decision":[88],"within":[90],"domain.":[92],"During":[93],"alignment,":[95],"maximum":[97],"classifier":[98],"discrepancy":[99],"gradient":[101],"reversed":[102],"layer":[103],"are":[104,120,141],"used":[105],"for":[106,226],"disentanglement":[109,190,231],"scene,":[111],"device,":[114],"while":[115],"four":[116],"candidate":[117],"classifiers":[119],"explore":[123],"optimal":[125],"solution":[126],"disentanglement.":[129],"To":[130],"evaluate":[131],"framework,":[134,171],"three":[135],"experiments":[136],"biased":[138,197],"tasks":[140],"designed:":[142],"1)":[143],"cross-city":[144],"cities;":[148],"2)":[149],"cross-device":[150],"devices;":[154],"3)":[155],"cross-city-device":[156],"devices.":[163],"Results":[164],"demonstrate":[165,205],"superiority":[167],"showcasing":[172],"performance":[173],"improvements":[174],"0.9%,":[176],"19.8%,":[177],"10.7%":[179],"classification":[181],"accuracy,":[182],"respectively.":[183],"The":[184],"effectiveness":[185],"approach":[191],"further":[193],"evaluated":[194],"both":[196],"unbiased":[199],"problems,":[201],"results":[204],"better-disentangled":[207],"can":[210],"lead":[211],"more":[214,237],"robust":[215],"cities.":[222],"This":[223],"advocates":[225],"integration":[228],"systems":[234],"achieve":[236],"reliable":[238]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
