{"id":"https://openalex.org/W2109016456","doi":"https://doi.org/10.1109/icassp.2005.1415594","title":"Unsupervised Auditory Scene Categorization via Key Audio Effects and Information-Theoretic Co-Clustering","display_name":"Unsupervised Auditory Scene Categorization via Key Audio Effects and Information-Theoretic Co-Clustering","publication_year":2006,"publication_date":"2006-10-11","ids":{"openalex":"https://openalex.org/W2109016456","doi":"https://doi.org/10.1109/icassp.2005.1415594","mag":"2109016456"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2005.1415594","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2005.1415594","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings. (ICASSP '05). IEEE International Conference on Acoustics, Speech, and Signal Processing, 2005.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069272117","display_name":"Rui Cai","orcid":"https://orcid.org/0000-0002-6499-2091"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Rui Cai","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110491600","display_name":"Lie Lu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lie Lu","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053661520","display_name":"Lianhong Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lian-Hong Cai","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5069272117"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.954,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.74611592,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"2","issue":null,"first_page":"1073","last_page":"1076"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8028271198272705},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.7880180478096008},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6466522216796875},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6451015472412109},{"id":"https://openalex.org/keywords/auditory-scene-analysis","display_name":"Auditory scene analysis","score":0.5869110822677612},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5768762826919556},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5736637115478516},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5570566654205322},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5209720730781555},{"id":"https://openalex.org/keywords/auditory-display","display_name":"Auditory display","score":0.460094153881073},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.45069438219070435},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42273175716400146},{"id":"https://openalex.org/keywords/scene-statistics","display_name":"Scene statistics","score":0.4194175899028778},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.41499656438827515},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.18614649772644043},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.12013241648674011}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8028271198272705},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.7880180478096008},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6466522216796875},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6451015472412109},{"id":"https://openalex.org/C38129911","wikidata":"https://www.wikidata.org/wiki/Q4820038","display_name":"Auditory scene analysis","level":3,"score":0.5869110822677612},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5768762826919556},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5736637115478516},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5570566654205322},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5209720730781555},{"id":"https://openalex.org/C171179263","wikidata":"https://www.wikidata.org/wiki/Q4820026","display_name":"Auditory display","level":2,"score":0.460094153881073},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.45069438219070435},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42273175716400146},{"id":"https://openalex.org/C197654239","wikidata":"https://www.wikidata.org/wiki/Q7430757","display_name":"Scene statistics","level":3,"score":0.4194175899028778},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.41499656438827515},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.18614649772644043},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.12013241648674011},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2005.1415594","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2005.1415594","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings. (ICASSP '05). IEEE International Conference on Acoustics, Speech, and Signal Processing, 2005.","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1585610988","https://openalex.org/W1940377967","https://openalex.org/W1985593448","https://openalex.org/W1999956262","https://openalex.org/W2058189943","https://openalex.org/W2124000984","https://openalex.org/W2127097022","https://openalex.org/W2133576408","https://openalex.org/W2139855016","https://openalex.org/W2434205482","https://openalex.org/W4243306052","https://openalex.org/W4255387915","https://openalex.org/W6635035540","https://openalex.org/W6650430342","https://openalex.org/W6678599822","https://openalex.org/W6678973112"],"related_works":["https://openalex.org/W2109586375","https://openalex.org/W1986440968","https://openalex.org/W2071302177","https://openalex.org/W4311804456","https://openalex.org/W2168616527","https://openalex.org/W4365135359","https://openalex.org/W2573606541","https://openalex.org/W2582543502","https://openalex.org/W2891061472","https://openalex.org/W4243894421"],"abstract_inverted_index":{"Automatic":[0],"categorization":[1],"of":[2],"auditory":[3,27,35,64,72,100,105],"scenes":[4,28,36,73,106],"is":[5,23,68,86],"very":[6,113],"useful":[7],"in":[8],"various":[9],"content-based":[10],"multimedia":[11],"applications,":[12],"such":[13],"as":[14],"video":[15],"indexing":[16],"and":[17,56,74,98],"context-aware":[18],"computing.":[19],"An":[20],"unsupervised":[21],"approach":[22],"proposed":[24],"to":[25,48,70,88],"group":[26,71],"with":[29],"similar":[30],"semantics.":[31],"In":[32,46,79],"our":[33],"approach,":[34],"are":[37],"described":[38],"by":[39],"the":[40,50,95,99],"key":[41,75,96],"audio":[42,54,76,110],"effects":[43,55,77,97],"they":[44],"contain.":[45],"order":[47],"exploit":[49],"relationships":[51],"between":[52],"different":[53],"provide":[57],"a":[58,81],"more":[59],"accurate":[60],"similarity":[61],"measure":[62],"for":[63,93],"scene":[65],"categorization,":[66],"co-clustering":[67],"used":[69,87],"simultaneously.":[78],"addition,":[80],"Bayesian":[82],"information":[83],"criterion":[84],"(BIC)":[85],"select":[89],"cluster":[90],"numbers":[91],"automatically":[92],"both":[94],"scenes.":[101],"Evaluation":[102],"on":[103],"272":[104],"extracted":[107],"from":[108],"12-hour":[109],"data":[111],"shows":[112],"encouraging":[114],"results.":[115]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
