{"id":"https://openalex.org/W4283693214","doi":"https://doi.org/10.21437/interspeech.2022-61","title":"Domain Generalization with Relaxed Instance Frequency-wise Normalization for Multi-device Acoustic Scene Classification","display_name":"Domain Generalization with Relaxed Instance Frequency-wise Normalization for Multi-device Acoustic Scene Classification","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4283693214","doi":"https://doi.org/10.21437/interspeech.2022-61"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-61","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-61","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023646340","display_name":"Byeonggeun Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Byeonggeun Kim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012112492","display_name":"Seung-Han Yang","orcid":"https://orcid.org/0000-0001-7842-0492"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Seunghan Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101905020","display_name":"Jangho Kim","orcid":"https://orcid.org/0000-0003-1334-4649"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jangho Kim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036697796","display_name":"Hyunsin Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hyunsin Park","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056497425","display_name":"Juntae Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Juntae Lee","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5112998078","display_name":"Simyung Chang","orcid":"https://orcid.org/0000-0001-7750-191X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Simyung Chang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5023646340"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.4394,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.91719144,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2393","last_page":"2397"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.7936149835586548},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7916483879089355},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7766484022140503},{"id":"https://openalex.org/keywords/frequency-domain","display_name":"Frequency domain","score":0.611402153968811},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.6019667387008667},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5713427066802979},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5598195791244507},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5439656972885132},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4980587959289551},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.47874516248703003},{"id":"https://openalex.org/keywords/contextual-image-classification","display_name":"Contextual image classification","score":0.46746826171875},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.24863839149475098},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.20743903517723083}],"concepts":[{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.7936149835586548},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7916483879089355},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7766484022140503},{"id":"https://openalex.org/C19118579","wikidata":"https://www.wikidata.org/wiki/Q786423","display_name":"Frequency domain","level":2,"score":0.611402153968811},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.6019667387008667},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5713427066802979},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5598195791244507},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5439656972885132},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4980587959289551},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.47874516248703003},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.46746826171875},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.24863839149475098},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.20743903517723083},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-61","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-61","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.75}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2965546495","https://openalex.org/W4389116644","https://openalex.org/W2153315159","https://openalex.org/W3103844505","https://openalex.org/W259157601","https://openalex.org/W4205463238","https://openalex.org/W1617617605","https://openalex.org/W2761785940","https://openalex.org/W2129933262","https://openalex.org/W2565656575"],"abstract_inverted_index":{"While":[0],"using":[1,17],"two-dimensional":[2],"convolutional":[3],"neural":[4],"networks":[5,97],"(2D-CNNs)":[6],"in":[7,41,47,80],"image":[8,34],"processing,":[9,35],"it":[10],"is":[11,45,141],"possible":[12],"to":[13,28,96,102],"manipulate":[14],"domain":[15,78,104],"information":[16,40],"channel":[18,52],"statistics,":[19],"and":[20,111,139],"instance":[21],"normalization":[22,68],"has":[23],"been":[24],"a":[25,65,136],"promising":[26],"way":[27],"get":[29],"domain-invariant":[30],"features.":[31],"However,":[32],"unlike":[33],"we":[36,58],"analyze":[37],"that":[38],"domain-relevant":[39],"an":[42,81,142],"audio":[43,82,117],"feature":[44,83],"dominant":[46],"frequency":[48,72],"statistics":[49],"rather":[50],"than":[51],"statistics.":[53],"Motivated":[54],"by":[55],"our":[56,146],"analysis,":[57],"introduce":[59],"Relaxed":[60],"Instance":[61],"Frequency-wise":[62],"Normalization":[63],"(RFN):":[64],"plug-and-play,":[66],"explicit":[67],"module":[69],"along":[70],"the":[71,120,124],"axis":[73],"which":[74],"can":[75],"eliminate":[76],"instance-specific":[77],"discrepancy":[79],"while":[84],"relaxing":[85],"undesirable":[86],"loss":[87],"of":[88,145],"useful":[89],"discriminative":[90],"information.":[91],"Empirically,":[92],"simply":[93],"adding":[94],"RFN":[95,122,140],"shows":[98],"clear":[99,137],"margins":[100],"compared":[101],"previous":[103],"generalization":[105],"approaches":[106],"on":[107],"acoustic":[108,129],"scene":[109,130],"classification":[110,131],"yields":[112],"improved":[113],"robustness":[114],"for":[115],"multiple":[116,133],"devices.":[118],"Especially,":[119],"proposed":[121],"won":[123],"DCASE2021":[125],"challenge":[126],"TASK1A,":[127],"low-complexity":[128],"with":[132,135],"devices,":[134],"margin,":[138],"extended":[143],"work":[144],"technical":[147],"report.":[148]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":5}],"updated_date":"2026-03-28T08:17:26.163206","created_date":"2025-10-10T00:00:00"}
