{"id":"https://openalex.org/W2592641653","doi":"https://doi.org/10.1109/icassp.2017.7953080","title":"End-to-end spoofing detection with raw waveform CLDNNS","display_name":"End-to-end spoofing detection with raw waveform CLDNNS","publication_year":2017,"publication_date":"2017-03-01","ids":{"openalex":"https://openalex.org/W2592641653","doi":"https://doi.org/10.1109/icassp.2017.7953080","mag":"2592641653"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2017.7953080","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2017.7953080","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2007.13060","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080154283","display_name":"Heinrich Dinkel","orcid":"https://orcid.org/0000-0003-4330-8980"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Heinrich Dinkel","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071702566","display_name":"Nanxin Chen","orcid":"https://orcid.org/0000-0001-6698-1604"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nanxin Chen","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100341993","display_name":"Yanmin Qian","orcid":"https://orcid.org/0000-0002-0314-3790"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanmin Qian","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043098653","display_name":"Kai Yu","orcid":"https://orcid.org/0000-0002-7102-9826"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Yu","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5080154283"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":5.6079,"has_fulltext":false,"cited_by_count":67,"citation_normalized_percentile":{"value":0.96574207,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4860","last_page":"4864"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9689000248908997,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8423998355865479},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6577269434928894},{"id":"https://openalex.org/keywords/spoofing-attack","display_name":"Spoofing attack","score":0.6575444340705872},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6493380069732666},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.6368874311447144},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5900585055351257},{"id":"https://openalex.org/keywords/extractor","display_name":"Extractor","score":0.5866904854774475},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.5221491456031799},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.507044792175293},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4848729074001312},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.48484933376312256},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.48469021916389465},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4504585266113281},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.42990052700042725},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.41692832112312317},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3454086184501648},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.09037280082702637},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.056111812591552734}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8423998355865479},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6577269434928894},{"id":"https://openalex.org/C167900197","wikidata":"https://www.wikidata.org/wiki/Q11081100","display_name":"Spoofing attack","level":2,"score":0.6575444340705872},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6493380069732666},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.6368874311447144},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5900585055351257},{"id":"https://openalex.org/C117978034","wikidata":"https://www.wikidata.org/wiki/Q5422192","display_name":"Extractor","level":2,"score":0.5866904854774475},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.5221491456031799},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.507044792175293},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4848729074001312},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.48484933376312256},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.48469021916389465},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4504585266113281},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.42990052700042725},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.41692832112312317},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3454086184501648},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.09037280082702637},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.056111812591552734},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C21880701","wikidata":"https://www.wikidata.org/wiki/Q2144042","display_name":"Process engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp.2017.7953080","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2017.7953080","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2007.13060","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2007.13060","pdf_url":"https://arxiv.org/pdf/2007.13060","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2007.13060","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2007.13060","pdf_url":"https://arxiv.org/pdf/2007.13060","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.75,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W6908809","https://openalex.org/W118835214","https://openalex.org/W1006777433","https://openalex.org/W1507099578","https://openalex.org/W1542280630","https://openalex.org/W1600744878","https://openalex.org/W1638038541","https://openalex.org/W1972940559","https://openalex.org/W2095705004","https://openalex.org/W2128377844","https://openalex.org/W2147145909","https://openalex.org/W2194775991","https://openalex.org/W2295634712","https://openalex.org/W2303197844","https://openalex.org/W2398826216","https://openalex.org/W2401839215","https://openalex.org/W2405506115","https://openalex.org/W2406778302","https://openalex.org/W2533075636","https://openalex.org/W2565961899","https://openalex.org/W2962940899","https://openalex.org/W6600284362","https://openalex.org/W6630429238","https://openalex.org/W6674330103","https://openalex.org/W6687483927","https://openalex.org/W6697322189","https://openalex.org/W6712560600","https://openalex.org/W6712941293","https://openalex.org/W6713287686","https://openalex.org/W6713679595"],"related_works":["https://openalex.org/W4294437891","https://openalex.org/W4226389478","https://openalex.org/W4297792928","https://openalex.org/W4221161333","https://openalex.org/W3113108043","https://openalex.org/W2061278248","https://openalex.org/W4298202768","https://openalex.org/W1914348842","https://openalex.org/W2331222812","https://openalex.org/W2294013337"],"abstract_inverted_index":{"Albeit":[0],"recent":[1],"progress":[2],"in":[3,11,24,43],"speaker":[4],"verification":[5],"generates":[6],"powerful":[7,55],"models,":[8],"malicious":[9],"attacks":[10],"the":[12,100,108,128,133,140,152,163,171,178],"form":[13],"of":[14],"spoofed":[15],"speech,":[16],"are":[17,33],"generally":[18],"not":[19],"coped":[20],"with.":[21],"Recent":[22],"results":[23],"ASVSpoof2015":[25],"and":[26,79,112],"BTAS2016":[27,129],"challenges":[28,45],"indicate":[29],"that":[30,132,170],"spoof-aware":[31,48],"features":[32],"a":[34,54,62,76,114],"possible":[35],"solution":[36],"to":[37,84,162],"this":[38,58],"problem.":[39],"Most":[40],"successful":[41],"methods":[42],"both":[44],"focus":[46],"on":[47,53,107,127],"features,":[49],"rather":[50],"than":[51,120],"focusing":[52],"classifier.":[56],"In":[57],"paper":[59],"we":[60],"present":[61],"novel":[63],"raw":[64,142],"waveform":[65,143],"based":[66,123],"deep":[67],"model":[68],"for":[69,102],"spoofing":[70],"detection,":[71],"which":[72,98],"jointly":[73],"acts":[74],"as":[75,94],"feature":[77],"extractor":[78],"classifier,":[80,97],"thus":[81],"allowing":[82],"it":[83,168],"directly":[85],"classify":[86],"speech":[87],"signals.":[88],"This":[89],"approach":[90],"can":[91],"be":[92],"considered":[93],"an":[95],"end-to-end":[96],"removes":[99],"need":[101],"any":[103],"pre-":[104],"or":[105],"post-processing":[106],"data,":[109],"making":[110],"training":[111],"evaluation":[113],"streamlined":[115],"process,":[116],"consuming":[117],"less":[118],"time":[119],"other":[121],"neural-network":[122],"approaches.":[124],"The":[125],"experiments":[126],"dataset":[130],"show":[131],"system":[134,173],"performance":[135],"is":[136],"significantly":[137],"improved":[138],"by":[139],"proposed":[141,172],"convolutional":[144],"long":[145],"short":[146],"term":[147],"neural":[148],"network":[149],"(CLDNN),":[150],"from":[151],"previous":[153],"best":[154],"published":[155],"1.26%":[156],"half":[157],"total":[158],"error":[159],"rate":[160],"(HTER)":[161],"current":[164],"0.82%":[165],"HTER.":[166],"Moreover":[167],"shows":[169],"also":[174],"performs":[175],"well":[176],"under":[177],"unknown":[179],"(RE-PH2-PH3,RE-LPPH2-PH3)":[180],"conditions.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":8}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
