{"id":"https://openalex.org/W4285190837","doi":"https://doi.org/10.1109/taslp.2022.3178233","title":"Gestalt Principles Emerge When Learning Universal Sound Source Separation","display_name":"Gestalt Principles Emerge When Learning Universal Sound Source Separation","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4285190837","doi":"https://doi.org/10.1109/taslp.2022.3178233"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2022.3178233","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3178233","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://mediatum.ub.tum.de/1661313","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038318511","display_name":"Han Li","orcid":"https://orcid.org/0000-0003-2298-332X"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]},{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["CN","DE"],"is_corresponding":true,"raw_author_name":"Han Li","raw_affiliation_strings":["School of Marine Science and Technology, Northwestern Polytechnical University, Xi&#x0027;an, China","School of Marine Science and Technology, Northwestern Polytechnical University, 710072, Xi'an, China, and Audio Information Processing group, Dept. of Electrical and Computer Engineering, Technische Universitat Munchen, 80333 Munich, Germany"],"raw_orcid":"https://orcid.org/0000-0003-2298-332X","affiliations":[{"raw_affiliation_string":"School of Marine Science and Technology, Northwestern Polytechnical University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"School of Marine Science and Technology, Northwestern Polytechnical University, 710072, Xi'an, China, and Audio Information Processing group, Dept. of Electrical and Computer Engineering, Technische Universitat Munchen, 80333 Munich, Germany","institution_ids":["https://openalex.org/I17145004","https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007763993","display_name":"Kean Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kean Chen","raw_affiliation_strings":["School of Marine Science and Technology, Northwestern Polytechnical University, Xi&#x0027;an, China","School of Marine Science and Technology, Northwestern Polytechnical University, 710072, Xi'an, China"],"raw_orcid":"https://orcid.org/0000-0001-5732-483X","affiliations":[{"raw_affiliation_string":"School of Marine Science and Technology, Northwestern Polytechnical University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"School of Marine Science and Technology, Northwestern Polytechnical University, 710072, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031783799","display_name":"Bernhard U. Seeber","orcid":"https://orcid.org/0000-0002-3621-3409"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Bernhard U. Seeber","raw_affiliation_strings":["Audio Information Processing group, Department of Electrical and Computer Engineering, Technische Universit&#x00E4;t M&#x00FC;nchen, Munich, Germany"],"raw_orcid":"https://orcid.org/0000-0002-3621-3409","affiliations":[{"raw_affiliation_string":"Audio Information Processing group, Department of Electrical and Computer Engineering, Technische Universit&#x00E4;t M&#x00FC;nchen, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5038318511"],"corresponding_institution_ids":["https://openalex.org/I17145004","https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":0.1488,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.38567378,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"30","issue":null,"first_page":"1877","last_page":"1891"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gestalt-psychology","display_name":"Gestalt psychology","score":0.7698274850845337},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.759635329246521},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6867926120758057},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.6599288582801819},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5287327766418457},{"id":"https://openalex.org/keywords/auditory-scene-analysis","display_name":"Auditory scene analysis","score":0.48682326078414917},{"id":"https://openalex.org/keywords/blind-signal-separation","display_name":"Blind signal separation","score":0.4429788589477539},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.34923338890075684},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3204226493835449},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.31432920694351196},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.19600704312324524},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.16594380140304565},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1089375913143158},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.10045880079269409},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07760888338088989}],"concepts":[{"id":"https://openalex.org/C27362006","wikidata":"https://www.wikidata.org/wiki/Q272021","display_name":"Gestalt psychology","level":3,"score":0.7698274850845337},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.759635329246521},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6867926120758057},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.6599288582801819},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5287327766418457},{"id":"https://openalex.org/C38129911","wikidata":"https://www.wikidata.org/wiki/Q4820038","display_name":"Auditory scene analysis","level":3,"score":0.48682326078414917},{"id":"https://openalex.org/C120317606","wikidata":"https://www.wikidata.org/wiki/Q17105967","display_name":"Blind signal separation","level":3,"score":0.4429788589477539},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.34923338890075684},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3204226493835449},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.31432920694351196},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.19600704312324524},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.16594380140304565},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1089375913143158},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10045880079269409},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07760888338088989},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/taslp.2022.3178233","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3178233","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:mediatum.ub.tum.de:node/1661313","is_oa":true,"landing_page_url":"https://mediatum.ub.tum.de/1661313","pdf_url":null,"source":{"id":"https://openalex.org/S4377196330","display_name":"mediaTUM  (Technical University of Munich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I62916508","host_organization_name":"Technical University of Munich","host_organization_lineage":["https://openalex.org/I62916508"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:mediatum.ub.tum.de:node/1661313","is_oa":true,"landing_page_url":"https://mediatum.ub.tum.de/1661313","pdf_url":null,"source":{"id":"https://openalex.org/S4377196330","display_name":"mediaTUM  (Technical University of Munich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I62916508","host_organization_name":"Technical University of Munich","host_organization_lineage":["https://openalex.org/I62916508"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.44999998807907104,"id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G4733890132","display_name":null,"funder_award_id":"01 GQ 1004B","funder_id":"https://openalex.org/F4320321114","funder_display_name":"Bundesministerium f\u00fcr Bildung und Forschung"},{"id":"https://openalex.org/G6275686549","display_name":null,"funder_award_id":"BMBF 01 GQ 1004B","funder_id":"https://openalex.org/F4320321115","funder_display_name":"Bernstein Center for Computational Neuroscience T\u00fcbingen"},{"id":"https://openalex.org/G7975114013","display_name":null,"funder_award_id":"BMBF 01 GQ 1004B","funder_id":"https://openalex.org/F4320321114","funder_display_name":"Bundesministerium f\u00fcr Bildung und Forschung"}],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"},{"id":"https://openalex.org/F4320321115","display_name":"Bernstein Center for Computational Neuroscience T\u00fcbingen","ror":"https://ror.org/00n8pjn35"},{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W81589471","https://openalex.org/W83828064","https://openalex.org/W1494198834","https://openalex.org/W1550041424","https://openalex.org/W1968087518","https://openalex.org/W2003053291","https://openalex.org/W2017537431","https://openalex.org/W2018637815","https://openalex.org/W2030351702","https://openalex.org/W2032101280","https://openalex.org/W2035032437","https://openalex.org/W2037320486","https://openalex.org/W2037752682","https://openalex.org/W2041714538","https://openalex.org/W2059159496","https://openalex.org/W2063911473","https://openalex.org/W2068742381","https://openalex.org/W2072201195","https://openalex.org/W2079080230","https://openalex.org/W2083660934","https://openalex.org/W2088769724","https://openalex.org/W2097191389","https://openalex.org/W2105062502","https://openalex.org/W2123157731","https://openalex.org/W2130198788","https://openalex.org/W2139239037","https://openalex.org/W2145084325","https://openalex.org/W2149425615","https://openalex.org/W2153646332","https://openalex.org/W2159796396","https://openalex.org/W2202261414","https://openalex.org/W2219249508","https://openalex.org/W2460742184","https://openalex.org/W2476684120","https://openalex.org/W2519091744","https://openalex.org/W2564079926","https://openalex.org/W2807432252","https://openalex.org/W2897371647","https://openalex.org/W2912263636","https://openalex.org/W2952218014","https://openalex.org/W2962866211","https://openalex.org/W2964058413","https://openalex.org/W2972460025","https://openalex.org/W2972541922","https://openalex.org/W2973253474","https://openalex.org/W2998657200","https://openalex.org/W3015199127","https://openalex.org/W3023341514","https://openalex.org/W3095717210","https://openalex.org/W3099330747","https://openalex.org/W3161003723","https://openalex.org/W3199092278","https://openalex.org/W4200464755","https://openalex.org/W4226064907","https://openalex.org/W4240683299","https://openalex.org/W4256399001","https://openalex.org/W6688816777"],"related_works":["https://openalex.org/W1509813908","https://openalex.org/W2031820693","https://openalex.org/W1910172735","https://openalex.org/W2107364365","https://openalex.org/W2118307209","https://openalex.org/W2113403277","https://openalex.org/W2137288760","https://openalex.org/W2889447638","https://openalex.org/W2539388437","https://openalex.org/W1785857632"],"abstract_inverted_index":{"Sound":[0],"source":[1,154],"separation":[2,28,46,53,100,122,155],"is":[3,12,31,160],"an":[4,14],"essential":[5],"aspect":[6],"in":[7,116,123,133,144],"auditory":[8,64,89,126,185],"scene":[9,90,186],"analysis,":[10],"which":[11,110],"still":[13],"urgent":[15],"challenge":[16],"for":[17,33,76,121,180],"machine":[18],"hearing.":[19],"In":[20],"this":[21],"paper,":[22],"a":[23,60],"fully":[24],"convolutional":[25],"time-domain":[26],"audio":[27],"network":[29,75,156],"(ConvTasNet)":[30],"trained":[32],"universal":[34,152],"two-source":[35],"separation,":[36],"consisting":[37],"of":[38,48,62,88,184],"speech,":[39],"environmental":[40],"sounds,":[41],"and":[42,78,139,146,166],"music.":[43],"Besides":[44],"the":[45,49,51,70,74,94,99,124,182],"performance":[47],"network,":[50,95],"underlying":[52],"mechanisms":[54,101],"are":[55,106,128],"our":[56,134],"main":[57],"concern.":[58],"Through":[59],"series":[61],"classic":[63],"segregation":[65],"experiments,":[66],"we":[67],"systematically":[68],"explore":[69],"principles":[71,120,159],"learned":[72],"by":[73],"simultaneous":[77],"sequential":[79],"organization.":[80],"The":[81,118,151],"results":[82],"show":[83],"that":[84,105],"without":[85],"prior":[86],"knowledge":[87],"analysis":[91],"imparted":[92],"on":[93],"it":[96],"spontaneously":[97],"learns":[98],"from":[102],"raw":[103],"waveforms":[104],"similar":[107],"to":[108,130,163,170],"those":[109],"have":[111],"developed":[112],"over":[113],"many":[114],"years":[115],"humans.":[117],"Gestalt":[119,158],"human":[125,175],"system":[127],"shown":[129],"be":[131,168],"effective":[132],"network:":[135],"harmonicity,":[136],"onset":[137],"synchrony":[138],"common":[140],"fate":[141],"(coherent":[142],"modulation":[143],"amplitude":[145],"frequency),":[147],"proximity,":[148],"continuity,":[149],"similarity.":[150],"sound":[153],"following":[157],"not":[161],"limited":[162],"specific":[164],"sources":[165],"can":[167],"applied":[169],"various":[171],"acoustic":[172],"situations":[173],"like":[174],"hearing,":[176],"providing":[177],"new":[178],"directions":[179],"solving":[181],"problem":[183],"analysis.":[187]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
