{"id":"https://openalex.org/W4313187207","doi":"https://doi.org/10.1109/mmsp55362.2022.9949512","title":"Domestic Activity Clustering from Audio via Depthwise Separable Convolutional Autoencoder Network","display_name":"Domestic Activity Clustering from Audio via Depthwise Separable Convolutional Autoencoder Network","publication_year":2022,"publication_date":"2022-09-26","ids":{"openalex":"https://openalex.org/W4313187207","doi":"https://doi.org/10.1109/mmsp55362.2022.9949512"},"language":"en","primary_location":{"id":"doi:10.1109/mmsp55362.2022.9949512","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp55362.2022.9949512","pdf_url":null,"source":{"id":"https://openalex.org/S4363605768","display_name":"2022 IEEE 24th International Workshop on Multimedia Signal Processing (MMSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 24th International Workshop on Multimedia Signal Processing (MMSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070863631","display_name":"Yanxiong Li","orcid":"https://orcid.org/0000-0003-4362-1125"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yanxiong Li","raw_affiliation_strings":["School of Electronic and Information Engineering, South China, University of Technology,Guangzhou,China","School of Electronic and Information Engineering, South China, University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, South China, University of Technology,Guangzhou,China","institution_ids":["https://openalex.org/I90610280"]},{"raw_affiliation_string":"School of Electronic and Information Engineering, South China, University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055239917","display_name":"Wenchang Cao","orcid":"https://orcid.org/0009-0008-2790-2983"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenchang Cao","raw_affiliation_strings":["School of Electronic and Information Engineering, South China, University of Technology,Guangzhou,China","School of Electronic and Information Engineering, South China, University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, South China, University of Technology,Guangzhou,China","institution_ids":["https://openalex.org/I90610280"]},{"raw_affiliation_string":"School of Electronic and Information Engineering, South China, University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108358814","display_name":"Konstantinos Drossos","orcid":null},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Konstantinos Drossos","raw_affiliation_strings":["Audio Research Group, Tampere University,Tampere,Finland","Audio Research Group, Tampere University, Tampere, Finland"],"affiliations":[{"raw_affiliation_string":"Audio Research Group, Tampere University,Tampere,Finland","institution_ids":["https://openalex.org/I166825849"]},{"raw_affiliation_string":"Audio Research Group, Tampere University, Tampere, Finland","institution_ids":["https://openalex.org/I166825849"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049691461","display_name":"Tuomas Virtanen","orcid":"https://orcid.org/0000-0002-4604-9729"},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Tuomas Virtanen","raw_affiliation_strings":["Audio Research Group, Tampere University,Tampere,Finland","Audio Research Group, Tampere University, Tampere, Finland"],"affiliations":[{"raw_affiliation_string":"Audio Research Group, Tampere University,Tampere,Finland","institution_ids":["https://openalex.org/I166825849"]},{"raw_affiliation_string":"Audio Research Group, Tampere University, Tampere, Finland","institution_ids":["https://openalex.org/I166825849"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5070863631"],"corresponding_institution_ids":["https://openalex.org/I90610280"],"apc_list":null,"apc_paid":null,"fwci":0.4909,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.62053712,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9726999998092651,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8456690311431885},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.8266503214836121},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7504510879516602},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5464732050895691},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.52176433801651},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4339294731616974},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.35935646295547485},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.20037931203842163}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8456690311431885},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.8266503214836121},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7504510879516602},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5464732050895691},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.52176433801651},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4339294731616974},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.35935646295547485},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.20037931203842163}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mmsp55362.2022.9949512","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp55362.2022.9949512","pdf_url":null,"source":{"id":"https://openalex.org/S4363605768","display_name":"2022 IEEE 24th International Workshop on Multimedia Signal Processing (MMSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 24th International Workshop on Multimedia Signal Processing (MMSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.75}],"awards":[{"id":"https://openalex.org/G4542277357","display_name":null,"funder_award_id":"62111530145,61771200","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1601124178","https://openalex.org/W1970088388","https://openalex.org/W2037603696","https://openalex.org/W2128437937","https://openalex.org/W2187089797","https://openalex.org/W2194775991","https://openalex.org/W2569249728","https://openalex.org/W2617512665","https://openalex.org/W2765741717","https://openalex.org/W2775794021","https://openalex.org/W2799400564","https://openalex.org/W2890718983","https://openalex.org/W2896862445","https://openalex.org/W2963163009","https://openalex.org/W2964074409","https://openalex.org/W2980689481","https://openalex.org/W3015384913","https://openalex.org/W3015530480","https://openalex.org/W3016227692","https://openalex.org/W3034892239","https://openalex.org/W3036237438","https://openalex.org/W3083274258","https://openalex.org/W3091490309","https://openalex.org/W3098136481","https://openalex.org/W3104924861","https://openalex.org/W3117314925","https://openalex.org/W3160061063","https://openalex.org/W3171617011","https://openalex.org/W4206364333","https://openalex.org/W4210634873","https://openalex.org/W4238186852","https://openalex.org/W4288325991","https://openalex.org/W4298091485","https://openalex.org/W6631190155","https://openalex.org/W6635946123","https://openalex.org/W6685380521","https://openalex.org/W6763665168","https://openalex.org/W6775751516","https://openalex.org/W6779341498","https://openalex.org/W6779574021"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2159052453","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W2145836866","https://openalex.org/W2803255133","https://openalex.org/W2909431601"],"abstract_inverted_index":{"Automatic":[0],"estimation":[1],"of":[2,32,40,55,72,121,133,150,158,166,178,185],"domestic":[3,33,41,56,73],"activities":[4],"from":[5,36],"audio":[6,47],"can":[7],"be":[8],"used":[9,125],"to":[10,45,51,103],"solve":[11],"many":[12],"problems,":[13],"such":[14],"as":[15],"reducing":[16],"the":[17,22,30,52,84,92,122,127,144,153],"labor":[18],"cost":[19],"for":[20],"nursing":[21],"elderly":[23],"people.":[24],"This":[25],"study":[26],"focuses":[27],"on":[28,115,129],"solving":[29],"problem":[31],"activity":[34,42,57,74],"clustering":[35,43,75,154],"audio.":[37],"The":[38],"target":[39],"is":[44,101,181],"cluster":[46,60,109],"clips":[48],"which":[49],"belong":[50],"same":[53],"category":[54],"into":[58],"one":[59],"in":[61,126,139,164],"an":[62],"unsupervised":[63],"way.":[64],"In":[65,83,170],"this":[66],"paper,":[67],"we":[68],"propose":[69],"a":[70,77,98,116],"method":[71,142,180],"using":[76],"depthwise":[78,93],"separable":[79,94],"convolutional":[80,95],"autoencoder":[81],"network.":[82],"proposed":[85],"method,":[86],"initial":[87],"embeddings":[88],"are":[89,113],"learned":[90],"by":[91],"autoencoder,":[96],"and":[97,108,131,136,152,160,168,175],"clustering-oriented":[99],"loss":[100],"designed":[102],"jointly":[104],"optimize":[105],"embedding":[106],"refinement":[107],"assignment.":[110],"Different":[111],"methods":[112,163],"evaluated":[114],"public":[117],"dataset":[118],"(a":[119],"derivative":[120],"SINS":[123],"dataset)":[124],"challenge":[128],"Detection":[130],"Classification":[132],"Acoustic":[134],"Scenes":[135],"Events":[137],"(DCASE)":[138],"2018.":[140],"Our":[141],"obtains":[143],"normalized":[145],"mutual":[146],"information":[147],"(NMI)":[148],"score":[149,157],"54.46%,":[151],"accuracy":[155],"(CA)":[156],"63.64%,":[159],"outperforms":[161],"state-of-the-art":[162],"terms":[165],"NMI":[167],"CA.":[169],"addition,":[171],"both":[172],"computational":[173],"complexity":[174],"memory":[176],"requirement":[177],"our":[179],"lower":[182],"than":[183],"that":[184],"previous":[186],"deep-model-based":[187],"methods.":[188],"Codes:":[189],"https://github.com/vinceasvp/domestic-activity-clustering-from-audio":[190]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
