{"id":"https://openalex.org/W4283690675","doi":"https://doi.org/10.48550/arxiv.2206.13066","title":"Detection of Doctored Speech: Towards an End-to-End Parametric Learn-able Filter Approach","display_name":"Detection of Doctored Speech: Towards an End-to-End Parametric Learn-able Filter Approach","publication_year":2022,"publication_date":"2022-06-27","ids":{"openalex":"https://openalex.org/W4283690675","doi":"https://doi.org/10.48550/arxiv.2206.13066"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2206.13066","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.13066","pdf_url":"https://arxiv.org/pdf/2206.13066","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2206.13066","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103258089","display_name":"Rohit Arora","orcid":"https://orcid.org/0000-0002-4843-7303"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Arora, Rohit","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5103258089"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9724000096321106,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7767235040664673},{"id":"https://openalex.org/keywords/wavelet","display_name":"Wavelet","score":0.6093466281890869},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.5618985295295715},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5114144682884216},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5026092529296875},{"id":"https://openalex.org/keywords/wavelet-transform","display_name":"Wavelet transform","score":0.48311543464660645},{"id":"https://openalex.org/keywords/deconvolution","display_name":"Deconvolution","score":0.4294039011001587},{"id":"https://openalex.org/keywords/filter-bank","display_name":"Filter bank","score":0.42082738876342773},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4194238483905792},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.34924787282943726},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.3227294087409973},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.17422911524772644},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.13756322860717773}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7767235040664673},{"id":"https://openalex.org/C47432892","wikidata":"https://www.wikidata.org/wiki/Q831390","display_name":"Wavelet","level":2,"score":0.6093466281890869},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.5618985295295715},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5114144682884216},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5026092529296875},{"id":"https://openalex.org/C196216189","wikidata":"https://www.wikidata.org/wiki/Q2867","display_name":"Wavelet transform","level":3,"score":0.48311543464660645},{"id":"https://openalex.org/C174576160","wikidata":"https://www.wikidata.org/wiki/Q1183700","display_name":"Deconvolution","level":2,"score":0.4294039011001587},{"id":"https://openalex.org/C100515483","wikidata":"https://www.wikidata.org/wiki/Q3268235","display_name":"Filter bank","level":3,"score":0.42082738876342773},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4194238483905792},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.34924787282943726},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3227294087409973},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.17422911524772644},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.13756322860717773}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2206.13066","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.13066","pdf_url":"https://arxiv.org/pdf/2206.13066","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2206.13066","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2206.13066","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2206.13066","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.13066","pdf_url":"https://arxiv.org/pdf/2206.13066","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6600000262260437,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2766680336","https://openalex.org/W2133320490","https://openalex.org/W4289830142","https://openalex.org/W1497065097","https://openalex.org/W2125446021","https://openalex.org/W3150393935","https://openalex.org/W2980055100","https://openalex.org/W2186790562","https://openalex.org/W2548564146","https://openalex.org/W2756038079"],"abstract_inverted_index":{"The":[0,31,60,75,143,170,233],"Automatic":[1],"Speaker":[2],"Verification":[3],"systems":[4],"have":[5],"potential":[6],"in":[7,48,167,180,210],"biometrics":[8],"applications":[9],"for":[10,186],"logical":[11],"control":[12],"access":[13],"and":[14,41,92,105,126,138,148,156,174,206,238,244,275],"authentication.":[15],"A":[16],"lot":[17],"of":[18,38,72,85,87,177],"things":[19],"happen":[20],"to":[21,97,192,222,264],"be":[22],"at":[23,189],"stake":[24],"if":[25],"the":[26,39,78,83,88,131,135,163,175,187,198,217,223,228,265,272],"ASV":[27],"system":[28],"is":[29],"compromised.":[30],"preliminary":[32],"work":[33],"presents":[34],"a":[35,202],"comparative":[36],"analysis":[37],"wavelet":[40,140],"MFCC-based":[42],"state-of-the-art":[43],"spoof":[44],"detection":[45],"techniques":[46],"developed":[47],"these":[49],"papers,":[50],"respectively":[51,247],"(Novoselov":[52],"et":[53,57],"al.,":[54,58],"2016)":[55],"(Alam":[56],"2016a).":[59],"results":[61],"on":[62,77,162],"ASVspoof":[63,79,168,251],"2015":[64],"justify":[65],"our":[66,114,157,211],"inclination":[67],"towards":[68,99],"wavelet-based":[69],"features":[70,91,259,266],"instead":[71],"MFCC":[73],"features.":[74],"experiments":[76],"2019":[80,252],"database":[81],"show":[82],"lack":[84],"credibility":[86],"traditional":[89,153],"handcrafted":[90,154],"give":[93],"us":[94],"more":[95,106,257],"reason":[96],"progress":[98],"using":[100,231],"end-to-end":[101],"deep":[102,119],"neural":[103],"networks":[104],"recent":[107],"techniques.":[108],"We":[109,116],"use":[110],"Sincnet":[111,158,245],"architecture":[112],"as":[113,262,270],"baseline.":[115],"get":[117],"E2E":[118],"learning":[120],"models,":[121],"which":[122],"we":[123,196],"call":[124],"WSTnet":[125],"CWTnet,":[127],"respectively,":[128],"by":[129,268],"replacing":[130],"Sinc":[132],"layer":[133,200,209,215],"with":[134,201],"Wavelet":[136,203,219],"Scattering":[137],"Continuous":[139],"transform":[141],"layers.":[142],"fusion":[144],"model":[145,235],"achieved":[146,236],"62%":[147],"17%":[149],"relative":[150,240],"improvement":[151,241],"over":[152,242,250],"models":[155,246],"baseline":[159],"when":[160,248],"evaluated":[161,249],"modern":[164],"spoofing":[165],"attacks":[166],"2019.":[169],"final":[171],"scale":[172,229],"distribution":[173],"number":[176],"scales":[178],"used":[179],"CWTnet":[181,212,224,243,269],"are":[182,260,279],"far":[183],"from":[184],"optimal":[185],"task":[188],"hand.":[190],"So":[191],"solve":[193],"this":[194],"problem,":[195],"replaced":[197],"CWT":[199],"Deconvolution(WD)":[204],"(Khan":[205],"Yener,":[207],"2018)":[208],"architecture.":[213],"This":[214,254],"calculates":[216],"Discrete-Continuous":[218],"Transform":[220],"similar":[221],"but":[225],"also":[226],"optimizes":[227],"parameter":[230],"back-propagation.":[232],"WDnet":[234],"26%":[237],"7%":[239],"dataset.":[253],"shows":[255],"that":[256],"generalized":[258],"extracted":[261,267],"compared":[263],"only":[271],"most":[273],"important":[274],"relevant":[276],"frequency":[277],"regions":[278],"focused":[280],"upon.":[281]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
