{"id":"https://openalex.org/W3015579786","doi":"https://doi.org/10.1109/icassp40776.2020.9053954","title":"E2E-SINCNET: Toward Fully End-To-End Speech Recognition","display_name":"E2E-SINCNET: Toward Fully End-To-End Speech Recognition","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015579786","doi":"https://doi.org/10.1109/icassp40776.2020.9053954","mag":"3015579786"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053954","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053954","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089505434","display_name":"Titouan Parcollet","orcid":"https://orcid.org/0000-0003-0672-1346"},"institutions":[{"id":"https://openalex.org/I198415970","display_name":"Universit\u00e9 d'Avignon et des Pays de Vaucluse","ror":"https://ror.org/00mfpxb84","country_code":"FR","type":"education","lineage":["https://openalex.org/I198415970"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Titouan Parcollet","raw_affiliation_strings":["Avignon Universit\u00e9, France"],"affiliations":[{"raw_affiliation_string":"Avignon Universit\u00e9, France","institution_ids":["https://openalex.org/I198415970"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068392446","display_name":"Mohamed Morchid","orcid":"https://orcid.org/0000-0002-4427-2468"},"institutions":[{"id":"https://openalex.org/I198415970","display_name":"Universit\u00e9 d'Avignon et des Pays de Vaucluse","ror":"https://ror.org/00mfpxb84","country_code":"FR","type":"education","lineage":["https://openalex.org/I198415970"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Mohamed Morchid","raw_affiliation_strings":["Avignon Universit\u00e9, France"],"affiliations":[{"raw_affiliation_string":"Avignon Universit\u00e9, France","institution_ids":["https://openalex.org/I198415970"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050508708","display_name":"Georges Linar\u00e8s","orcid":"https://orcid.org/0000-0001-8049-9056"},"institutions":[{"id":"https://openalex.org/I198415970","display_name":"Universit\u00e9 d'Avignon et des Pays de Vaucluse","ror":"https://ror.org/00mfpxb84","country_code":"FR","type":"education","lineage":["https://openalex.org/I198415970"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Georges Linares","raw_affiliation_strings":["Avignon Universit\u00e9, France"],"affiliations":[{"raw_affiliation_string":"Avignon Universit\u00e9, France","institution_ids":["https://openalex.org/I198415970"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5089505434"],"corresponding_institution_ids":["https://openalex.org/I198415970"],"apc_list":null,"apc_paid":null,"fwci":3.5791,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.94006464,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"7714","last_page":"7718"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.789096474647522},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7474451065063477},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.7007118463516235},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.6117244958877563},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.5714573860168457},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5162815451622009},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.4836793839931488},{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.481020987033844},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4706263542175293},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.46954044699668884},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3929820656776428},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3349092900753021},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3034190833568573},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.2703841030597687},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.17379948496818542},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07556930184364319}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.789096474647522},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7474451065063477},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.7007118463516235},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.6117244958877563},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.5714573860168457},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5162815451622009},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.4836793839931488},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.481020987033844},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4706263542175293},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.46954044699668884},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3929820656776428},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3349092900753021},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3034190833568573},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2703841030597687},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.17379948496818542},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07556930184364319},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053954","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053954","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7200000286102295,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W6908809","https://openalex.org/W854541894","https://openalex.org/W1524333225","https://openalex.org/W1535749512","https://openalex.org/W1542280630","https://openalex.org/W1604034532","https://openalex.org/W1634752325","https://openalex.org/W1635512741","https://openalex.org/W1902237438","https://openalex.org/W2102113734","https://openalex.org/W2127141656","https://openalex.org/W2408093180","https://openalex.org/W2526425061","https://openalex.org/W2627092829","https://openalex.org/W2892009249","https://openalex.org/W2901616798","https://openalex.org/W2903799412","https://openalex.org/W2911291251","https://openalex.org/W2962759037","https://openalex.org/W2962780374","https://openalex.org/W2962826786","https://openalex.org/W2963071736","https://openalex.org/W2963403664","https://openalex.org/W2964052309","https://openalex.org/W2964227577","https://openalex.org/W2972389417","https://openalex.org/W2973053574","https://openalex.org/W2973215447","https://openalex.org/W3008912312","https://openalex.org/W6600284362","https://openalex.org/W6623517193","https://openalex.org/W6631362777","https://openalex.org/W6636687593","https://openalex.org/W6675365184","https://openalex.org/W6713548365","https://openalex.org/W6725896364","https://openalex.org/W6756859573","https://openalex.org/W6757084628"],"related_works":["https://openalex.org/W1494724239","https://openalex.org/W3081187864","https://openalex.org/W151018310","https://openalex.org/W4380605396","https://openalex.org/W2916997151","https://openalex.org/W3198455051","https://openalex.org/W3133352777","https://openalex.org/W2949174760","https://openalex.org/W2008737763","https://openalex.org/W2584084702"],"abstract_inverted_index":{"Modern":[0],"end-to-end":[1],"(E2E)":[2],"Automatic":[3],"Speech":[4],"Recognition":[5],"(ASR)":[6],"systems":[7,110],"rely":[8],"on":[9,18,96,113,131],"Deep":[10],"Neural":[11],"Networks":[12],"(DNN)":[13],"that":[14,69,103],"are":[15,42],"mostly":[16],"trained":[17],"handcrafted":[19],"and":[20,32,83,87],"pre-computed":[21,118],"acoustic":[22,119],"features":[23],"such":[24],"as":[25],"Mel-filter-banks":[26],"or":[27,117],"Mel-frequency":[28],"cepstral":[29],"coefficients.":[30],"Nonetheless,":[31],"despite":[33],"worse":[34],"performances,":[35],"E2E":[36,66,109],"ASR":[37,67],"models":[38],"processing":[39],"raw":[40,73,115],"waveforms":[41],"an":[43],"active":[44],"research":[45],"field":[46],"due":[47],"to":[48,75],"the":[49,53,61,72,76,88,114,132],"lossless":[50],"nature":[51],"of":[52,129],"input":[54],"signal.":[55],"In":[56],"this":[57],"paper,":[58],"we":[59],"propose":[60],"E2E-SincNet,":[62],"a":[63,122],"novel":[64],"fully":[65],"model":[68],"goes":[70],"from":[71],"waveform":[74,116],"text":[77],"transcripts":[78],"by":[79],"merging":[80],"two":[81,97],"recent":[82],"powerful":[84],"paradigms:":[85],"SincNet":[86],"joint":[89],"CTC-attention":[90],"training":[91],"scheme.":[92],"The":[93],"conducted":[94],"experiments":[95],"different":[98],"speech":[99],"recognition":[100],"tasks":[101],"show":[102],"our":[104],"approach":[105],"outperforms":[106],"previously":[107],"investigated":[108],"relying":[111],"either":[112],"features,":[120],"with":[121],"reported":[123],"top-of-the-line":[124],"Word":[125],"Error":[126],"Rate":[127],"(WER)":[128],"4.7%":[130],"Wall":[133],"Street":[134],"Journal":[135],"(WSJ)":[136],"dataset.":[137]},"counts_by_year":[{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
