{"id":"https://openalex.org/W2972909277","doi":"https://doi.org/10.21437/interspeech.2019-1982","title":"RawNet: Advanced End-to-End Deep Neural Network Using Raw Waveforms for Text-Independent Speaker Verification","display_name":"RawNet: Advanced End-to-End Deep Neural Network Using Raw Waveforms for Text-Independent Speaker Verification","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2972909277","doi":"https://doi.org/10.21437/interspeech.2019-1982","mag":"2972909277"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2019-1982","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-1982","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091834525","display_name":"Jee-weon Jung","orcid":"https://orcid.org/0000-0003-0505-2988"},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jee-weon Jung","raw_affiliation_strings":["School of Computer Science, University of Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Seoul, South Korea","institution_ids":["https://openalex.org/I124633538"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070613375","display_name":"Hee-Soo Heo","orcid":"https://orcid.org/0000-0003-1567-123X"},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hee-Soo Heo","raw_affiliation_strings":["School of Computer Science, University of Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Seoul, South Korea","institution_ids":["https://openalex.org/I124633538"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101866564","display_name":"Ju-ho Kim","orcid":"https://orcid.org/0009-0001-4055-4564"},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Ju-ho Kim","raw_affiliation_strings":["School of Computer Science, University of Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Seoul, South Korea","institution_ids":["https://openalex.org/I124633538"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103211980","display_name":"Hye-jin Shim","orcid":null},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hye-jin Shim","raw_affiliation_strings":["School of Computer Science, University of Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Seoul, South Korea","institution_ids":["https://openalex.org/I124633538"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030475312","display_name":"Ha-Jin Yu","orcid":"https://orcid.org/0000-0003-3657-0665"},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Ha-Jin Yu","raw_affiliation_strings":["School of Computer Science, University of Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Seoul, South Korea","institution_ids":["https://openalex.org/I124633538"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5030475312"],"corresponding_institution_ids":["https://openalex.org/I124633538"],"apc_list":null,"apc_paid":null,"fwci":12.7324,"has_fulltext":false,"cited_by_count":158,"citation_normalized_percentile":{"value":0.98935127,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1268","last_page":"1272"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.8461177349090576},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.8386412858963013},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7446922063827515},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5994340777397156},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5677189826965332},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.5438182950019836},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4302116334438324},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4114055335521698},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.35530510544776917},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.1524195373058319}],"concepts":[{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.8461177349090576},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.8386412858963013},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7446922063827515},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5994340777397156},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5677189826965332},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.5438182950019836},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4302116334438324},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4114055335521698},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.35530510544776917},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.1524195373058319},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2019-1982","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-1982","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.550000011920929,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"},{"score":0.4099999964237213,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G7779203582","display_name":null,"funder_award_id":"10076583","funder_id":"https://openalex.org/F4320321681","funder_display_name":"Ministry of Trade, Industry and Energy"},{"id":"https://openalex.org/G8582691448","display_name":null,"funder_award_id":"MOTIE, Korea","funder_id":"https://openalex.org/F4320321681","funder_display_name":"Ministry of Trade, Industry and Energy"},{"id":"https://openalex.org/G992484961","display_name":null,"funder_award_id":"Korea","funder_id":"https://openalex.org/F4320321681","funder_display_name":"Ministry of Trade, Industry and Energy"}],"funders":[{"id":"https://openalex.org/F4320321681","display_name":"Ministry of Trade, Industry and Energy","ror":"https://ror.org/008nkqk13"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1542280630","https://openalex.org/W1548442879","https://openalex.org/W1836465849","https://openalex.org/W1924770834","https://openalex.org/W2049244943","https://openalex.org/W2116261113","https://openalex.org/W2150769028","https://openalex.org/W2157331557","https://openalex.org/W2168249605","https://openalex.org/W2194775991","https://openalex.org/W2212703438","https://openalex.org/W2271840356","https://openalex.org/W2302255633","https://openalex.org/W2398826216","https://openalex.org/W2404380129","https://openalex.org/W2406312423","https://openalex.org/W2520774990","https://openalex.org/W2592641653","https://openalex.org/W2593179621","https://openalex.org/W2726515241","https://openalex.org/W2747900149","https://openalex.org/W2770454110","https://openalex.org/W2785523195","https://openalex.org/W2794506738","https://openalex.org/W2887814324","https://openalex.org/W2889151164","https://openalex.org/W2890964092","https://openalex.org/W2953384591","https://openalex.org/W2973149080","https://openalex.org/W4289547844","https://openalex.org/W4289750118"],"related_works":["https://openalex.org/W1521299571","https://openalex.org/W2972908178","https://openalex.org/W3096066489","https://openalex.org/W2972909277","https://openalex.org/W4380906377","https://openalex.org/W3141593045","https://openalex.org/W2806010980","https://openalex.org/W2134501921","https://openalex.org/W3048257020","https://openalex.org/W204267554"],"abstract_inverted_index":{"Recently,":[0],"direct":[1],"modeling":[2],"of":[3,17,26,68,90],"raw":[4,27,47],"waveforms":[5,28,48],"using":[6,71],"deep":[7,42,135],"neural":[8,43,136],"networks":[9,44],"has":[10],"been":[11],"widely":[12],"studied":[13],"for":[14,140,148],"a":[15,72,80],"number":[16],"tasks":[18],"in":[19,30,83],"audio":[20],"domains.In":[21],"speaker":[22,54,77,92,123,142],"verification,":[23],"however,":[24],"utilization":[25],"is":[29,171],"its":[31],"preliminary":[32],"phase,":[33,111],"requiring":[34],"further":[35],"investigation.In":[36],"this":[37],"study,":[38],"we":[39],"explore":[40],"end-to-end":[41],"that":[45,119,132,157,179],"input":[46],"to":[49,174],"improve":[50],"various":[51],"aspects:":[52],"front-end":[53],"embedding":[55,124,143],"extraction":[56,144],"including":[57],"model":[58,69,160],"architecture,":[59],"pre-training":[60,73],"scheme,":[61],"additional":[62],"objective":[63,85],"functions,":[64],"and":[65,107,145],"back-end":[66,116,149],"classification.Adjustment":[67],"architecture":[70],"scheme":[74],"can":[75],"extract":[76],"embeddings,":[78],"giving":[79],"significant":[81],"improvement":[82],"performance.Additional":[84],"functions":[86],"simplify":[87],"the":[88,108,121,146,153,158,175],"process":[89],"extracting":[91,99],"embeddings":[93],"by":[94],"merging":[95],"conventional":[96],"two-phase":[97],"processes:":[98],"utterance-level":[100,141],"features":[101],"such":[102],"as":[103],"i-vectors":[104],"or":[105],"x-vectors":[106],"feature":[109],"enhancement":[110],"e.g.,":[112],"linear":[113],"discriminant":[114],"analysis.Effective":[115],"classification":[117],"models":[118],"suit":[120],"proposed":[122,159,169],"are":[125],"also":[126,172],"explored.We":[127],"propose":[128],"an":[129],"end-toend":[130],"system":[131,170,178],"comprises":[133],"two":[134],"networks,":[137],"one":[138],"frontend":[139],"other":[147],"classification.Experiments":[150],"conducted":[151],"on":[152],"VoxCeleb1":[154],"dataset":[155],"demonstrate":[156],"achieves":[161],"state-of-the-art":[162,176],"performance":[163],"among":[164],"systems":[165],"without":[166],"data":[167,181],"augmentation.The":[168],"comparable":[173],"x-vector":[177],"adopts":[180],"augmentation.":[182]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":19},{"year":2024,"cited_by_count":25},{"year":2023,"cited_by_count":20},{"year":2022,"cited_by_count":34},{"year":2021,"cited_by_count":27},{"year":2020,"cited_by_count":24},{"year":2019,"cited_by_count":3}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
