{"id":"https://openalex.org/W2400549570","doi":"https://doi.org/10.1109/icassp.2016.7472622","title":"A deep scattering spectrum \u2014 Deep Siamese network pipeline for unsupervised acoustic modeling","display_name":"A deep scattering spectrum \u2014 Deep Siamese network pipeline for unsupervised acoustic modeling","publication_year":2016,"publication_date":"2016-03-01","ids":{"openalex":"https://openalex.org/W2400549570","doi":"https://doi.org/10.1109/icassp.2016.7472622","mag":"2400549570"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2016.7472622","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472622","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047639590","display_name":"Neil Zeghidour","orcid":"https://orcid.org/0000-0001-6896-3987"},"institutions":[{"id":"https://openalex.org/I2746051580","display_name":"Universit\u00e9 Paris Sciences et Lettres","ror":"https://ror.org/013cjyk83","country_code":"FR","type":"education","lineage":["https://openalex.org/I2746051580"]},{"id":"https://openalex.org/I29607241","display_name":"\u00c9cole Normale Sup\u00e9rieure - PSL","ror":"https://ror.org/05a0dhs15","country_code":"FR","type":"funder","lineage":["https://openalex.org/I2746051580","https://openalex.org/I29607241"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Neil Zeghidour","raw_affiliation_strings":["\u00c9cole Normale Sup\u00e9rieure, PSL Research University, France"],"affiliations":[{"raw_affiliation_string":"\u00c9cole Normale Sup\u00e9rieure, PSL Research University, France","institution_ids":["https://openalex.org/I29607241","https://openalex.org/I2746051580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041907084","display_name":"Gabriel Synnaeve","orcid":"https://orcid.org/0000-0003-1715-3356"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gabriel Synnaeve","raw_affiliation_strings":["Facebook A.I. Research, Paris, France"],"affiliations":[{"raw_affiliation_string":"Facebook A.I. Research, Paris, France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055225582","display_name":"Maarten Versteegh","orcid":null},"institutions":[{"id":"https://openalex.org/I2746051580","display_name":"Universit\u00e9 Paris Sciences et Lettres","ror":"https://ror.org/013cjyk83","country_code":"FR","type":"education","lineage":["https://openalex.org/I2746051580"]},{"id":"https://openalex.org/I29607241","display_name":"\u00c9cole Normale Sup\u00e9rieure - PSL","ror":"https://ror.org/05a0dhs15","country_code":"FR","type":"funder","lineage":["https://openalex.org/I2746051580","https://openalex.org/I29607241"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Maarten Versteegh","raw_affiliation_strings":["\u00c9cole Normale Sup\u00e9rieure, PSL Research University, France"],"affiliations":[{"raw_affiliation_string":"\u00c9cole Normale Sup\u00e9rieure, PSL Research University, France","institution_ids":["https://openalex.org/I29607241","https://openalex.org/I2746051580"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007620149","display_name":"Emmanuel Dupoux","orcid":"https://orcid.org/0000-0002-7814-2952"},"institutions":[{"id":"https://openalex.org/I2746051580","display_name":"Universit\u00e9 Paris Sciences et Lettres","ror":"https://ror.org/013cjyk83","country_code":"FR","type":"education","lineage":["https://openalex.org/I2746051580"]},{"id":"https://openalex.org/I29607241","display_name":"\u00c9cole Normale Sup\u00e9rieure - PSL","ror":"https://ror.org/05a0dhs15","country_code":"FR","type":"funder","lineage":["https://openalex.org/I2746051580","https://openalex.org/I29607241"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Emmanuel Dupoux","raw_affiliation_strings":["\u00c9cole Normale Sup\u00e9rieure, PSL Research University, France"],"affiliations":[{"raw_affiliation_string":"\u00c9cole Normale Sup\u00e9rieure, PSL Research University, France","institution_ids":["https://openalex.org/I29607241","https://openalex.org/I2746051580"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5047639590"],"corresponding_institution_ids":["https://openalex.org/I2746051580","https://openalex.org/I29607241"],"apc_list":null,"apc_paid":null,"fwci":5.8582,"has_fulltext":false,"cited_by_count":41,"citation_normalized_percentile":{"value":0.96802343,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7054499983787537},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.601660966873169},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.48799437284469604},{"id":"https://openalex.org/keywords/spectrum","display_name":"Spectrum (functional analysis)","score":0.4524742662906647},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.408814400434494},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.40777045488357544},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.3451218903064728},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.14396318793296814}],"concepts":[{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7054499983787537},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.601660966873169},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.48799437284469604},{"id":"https://openalex.org/C156778621","wikidata":"https://www.wikidata.org/wiki/Q1365748","display_name":"Spectrum (functional analysis)","level":2,"score":0.4524742662906647},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.408814400434494},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.40777045488357544},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.3451218903064728},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.14396318793296814},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2016.7472622","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472622","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7900000214576721}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W52412328","https://openalex.org/W1666984270","https://openalex.org/W1796128977","https://openalex.org/W2025482506","https://openalex.org/W2044138293","https://openalex.org/W2052697931","https://openalex.org/W2057007397","https://openalex.org/W2093231248","https://openalex.org/W2112688413","https://openalex.org/W2128160875","https://openalex.org/W2154833897","https://openalex.org/W2171590421","https://openalex.org/W2252172689","https://openalex.org/W2395899413","https://openalex.org/W2396043527","https://openalex.org/W2399576818","https://openalex.org/W2404799143","https://openalex.org/W2406349064","https://openalex.org/W2786608204","https://openalex.org/W2962701684","https://openalex.org/W2963620343","https://openalex.org/W6602180557","https://openalex.org/W6637061625","https://openalex.org/W6638159135","https://openalex.org/W6677154653","https://openalex.org/W6712202099","https://openalex.org/W6712444837","https://openalex.org/W6712553779","https://openalex.org/W6713745070","https://openalex.org/W6973666849"],"related_works":["https://openalex.org/W2731899572","https://openalex.org/W1595868330","https://openalex.org/W3215138031","https://openalex.org/W3009238340","https://openalex.org/W2939353110","https://openalex.org/W2992516105","https://openalex.org/W4286783850","https://openalex.org/W3025626553","https://openalex.org/W3090132503","https://openalex.org/W3126683096"],"abstract_inverted_index":{"Recent":[0],"work":[1],"has":[2],"explored":[3],"deep":[4],"architectures":[5,104],"for":[6,16],"learning":[7],"acoustic":[8],"features":[9,90,109],"in":[10,29,67],"an":[11,48],"unsupervised":[12,101],"or":[13],"weakly-supervised":[14,103],"way":[15],"phone":[17],"recognition.":[18],"Here":[19],"we":[20,31],"investigate":[21],"the":[22,25,68,80,111],"role":[23],"of":[24],"input":[26],"features,":[27,86],"and":[28,74,102],"particular":[30],"test":[32],"whether":[33],"standard":[34],"mel-scaled":[35],"filterbanks":[36,81],"could":[37],"be":[38],"replaced":[39,83],"by":[40,84],"inherently":[41],"richer":[42,108],"representations,":[43],"such":[44],"as":[45],"derived":[46],"from":[47,107],"analytic":[49],"scattering":[50,85],"spectrum.":[51],"We":[52],"use":[53],"a":[54,63,76],"Siamese":[55],"network":[56],"using":[57],"lexical":[58],"side":[59],"information":[60],"similar":[61,92],"to":[62],"well-performing":[64],"architecture":[65],"used":[66],"Zero":[69],"Resource":[70],"Speech":[71],"Challenge":[72],"(2015),":[73],"show":[75],"substantial":[77],"improvement":[78],"when":[79,94],"are":[82],"even":[87],"though":[88],"these":[89],"yield":[91],"performance":[93],"tested":[95],"without":[96],"training.":[97],"This":[98],"shows":[99],"that":[100],"can":[105],"benefit":[106],"than":[110],"traditional":[112],"ones.":[113]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
