{"id":"https://openalex.org/W2962922562","doi":"https://doi.org/10.21437/odyssey.2018-14","title":"Convolutional Neural Network and Language Embeddings for End-to-End Dialect Recognition","display_name":"Convolutional Neural Network and Language Embeddings for End-to-End Dialect Recognition","publication_year":2018,"publication_date":"2018-06-06","ids":{"openalex":"https://openalex.org/W2962922562","doi":"https://doi.org/10.21437/odyssey.2018-14","mag":"2962922562"},"language":"en","primary_location":{"id":"doi:10.21437/odyssey.2018-14","is_oa":false,"landing_page_url":"https://doi.org/10.21437/odyssey.2018-14","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Speaker and Language Recognition Workshop (Odyssey 2018)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020263683","display_name":"Suwon Shon","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Suwon Shon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100693435","display_name":"Ahmed Ali","orcid":"https://orcid.org/0000-0002-9186-7544"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmed Ali","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5112758056","display_name":"James Glass","orcid":"https://orcid.org/0000-0002-3097-360X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"James Glass","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5020263683"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":7.1669,"has_fulltext":false,"cited_by_count":70,"citation_normalized_percentile":{"value":0.97627312,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"98","last_page":"104"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11640","display_name":"Linguistic Variation and Morphology","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/3310","display_name":"Linguistics and Language"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9804999828338623,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.784383237361908},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.673758327960968},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6060613393783569},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.5920061469078064},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5778155326843262},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5440059304237366},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5412594079971313},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.4906803369522095},{"id":"https://openalex.org/keywords/filter-bank","display_name":"Filter bank","score":0.48392564058303833},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4790036380290985},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.46903881430625916},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4492449164390564},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.44178637862205505},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.43055206537246704},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4267960786819458},{"id":"https://openalex.org/keywords/arabic-numerals","display_name":"Arabic numerals","score":0.41736772656440735},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39951303601264954},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.3690190315246582},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.19015970826148987},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.0867224931716919}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.784383237361908},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.673758327960968},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6060613393783569},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.5920061469078064},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5778155326843262},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5440059304237366},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5412594079971313},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.4906803369522095},{"id":"https://openalex.org/C100515483","wikidata":"https://www.wikidata.org/wiki/Q3268235","display_name":"Filter bank","level":3,"score":0.48392564058303833},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4790036380290985},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.46903881430625916},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4492449164390564},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.44178637862205505},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.43055206537246704},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4267960786819458},{"id":"https://openalex.org/C82054205","wikidata":"https://www.wikidata.org/wiki/Q29961325","display_name":"Arabic numerals","level":2,"score":0.41736772656440735},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39951303601264954},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.3690190315246582},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.19015970826148987},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0867224931716919},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/odyssey.2018-14","is_oa":false,"landing_page_url":"https://doi.org/10.21437/odyssey.2018-14","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Speaker and Language Recognition Workshop (Odyssey 2018)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5899999737739563,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2766680336","https://openalex.org/W2133320490","https://openalex.org/W4289830142","https://openalex.org/W1497065097","https://openalex.org/W3150393935","https://openalex.org/W2125446021","https://openalex.org/W2980055100","https://openalex.org/W2186790562","https://openalex.org/W2548564146","https://openalex.org/W2756038079"],"abstract_inverted_index":{"Dialect":[0],"identification":[1,10],"(DID)":[2],"is":[3],"a":[4,13,34,91,136,144,200,217],"special":[5],"case":[6],"of":[7,72,159],"general":[8],"language":[9,40,193],"(LID),":[11],"but":[12],"more":[14],"challenging":[15],"problem":[16],"due":[17],"to":[18,38,95,181,185],"the":[19,50,54,116,153,186,189,203,207],"linguistic":[20,47,104],"similarity":[21],"between":[22,113],"dialects.":[23,161],"In":[24],"this":[25],"paper,":[26],"we":[27,121],"propose":[28],"an":[29],"end-to-end":[30,64,204],"DID":[31,51,65,130,205],"system":[32,66,134,146],"and":[33,46,85,111],"Siamese":[35,117,190],"neural":[36],"network":[37,191],"extract":[39],"embeddings.":[41],"We":[42,88],"use":[43],"both":[44],"acoustic":[45,73],"features":[48,149,168],"for":[49],"task":[52],"on":[53,108,152],"Arabic":[55],"dialectal":[56],"speech":[57],"dataset:":[58],"Multi-Genre":[59],"Broadcast":[60],"3":[61],"(MGB-3).":[62],"The":[63,132,162],"was":[67],"trained":[68],"using":[69,115,135,147],"three":[70],"kinds":[71],"features:":[74],"Mel-Frequency":[75],"Cepstral":[76],"Coefficients":[77],"(MFCCs),":[78],"log":[79],"Mel-scale":[80],"Filter":[81],"Bank":[82],"energies":[83],"(FBANK)":[84],"spectrogram":[86],"energies.":[87],"also":[89],"investigated":[90],"dataset":[92],"augmentation":[93,176],"approach":[94],"achieve":[96,169,197],"robust":[97],"performance":[98],"with":[99,192],"limited":[100],"data":[101],"resources.":[102],"Our":[103],"feature":[105,124,138],"research":[106],"focused":[107],"learning":[109],"similarities":[110],"dissimilarities":[112],"dialects":[114],"network,":[118],"so":[119],"that":[120,166],"can":[122],"reduce":[123],"dimensionality":[125],"as":[126,128,198,202],"well":[127],"improve":[129],"performance.":[131],"best":[133],"single":[137],"set":[139,157],"achieves":[140],"73%":[141],"accuracy,":[142],"while":[143],"fusion":[145],"multiple":[148],"yields":[150],"78%":[151],"MGB-3":[154],"dialect":[155],"test":[156],"consisting":[158],"5":[160],"experimental":[163],"results":[164,172],"indicate":[165],"FBANK":[167],"slightly":[170],"better":[171],"than":[173],"MFCCs.":[174],"Dataset":[175],"via":[177],"speed":[178],"perturbation":[179],"appears":[180],"add":[182],"significant":[183],"robustness":[184],"system.":[187,219],"Although":[188],"embeddings":[194],"did":[195],"not":[196],"good":[199,211],"result":[201],"system,":[206],"two":[208],"approaches":[209],"had":[210],"synergy":[212],"when":[213],"combined":[214],"together":[215],"in":[216],"fused":[218]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":11},{"year":2019,"cited_by_count":15},{"year":2018,"cited_by_count":8}],"updated_date":"2026-01-21T23:30:37.877113","created_date":"2025-10-10T00:00:00"}
