{"id":"https://openalex.org/W2740489261","doi":"https://doi.org/10.18653/v1/w17-1317","title":"Toward a Web-based Speech Corpus for Algerian Dialectal Arabic Varieties","display_name":"Toward a Web-based Speech Corpus for Algerian Dialectal Arabic Varieties","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2740489261","doi":"https://doi.org/10.18653/v1/w17-1317","mag":"2740489261"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w17-1317","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-1317","pdf_url":"https://www.aclweb.org/anthology/W17-1317.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Third Arabic Natural Language Processing Workshop","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W17-1317.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005569097","display_name":"Soumia Bougrine","orcid":null},"institutions":[{"id":"https://openalex.org/I50219554","display_name":"University of Laghouat","ror":"https://ror.org/018bbh535","country_code":"DZ","type":"education","lineage":["https://openalex.org/I50219554"]}],"countries":["DZ"],"is_corresponding":true,"raw_author_name":"Soumia Bougrine","raw_affiliation_strings":["Laboratoire d'informatique et Mathmatiques Universit Amar Telidji Laghouat, Algrie","Laboratoire d'informatique et Math\u00e9matiques Universit\u00e9 Amar Telidji Laghouat, Alg\u00e9rie"],"affiliations":[{"raw_affiliation_string":"Laboratoire d'informatique et Mathmatiques Universit Amar Telidji Laghouat, Algrie","institution_ids":["https://openalex.org/I50219554"]},{"raw_affiliation_string":"Laboratoire d'informatique et Math\u00e9matiques Universit\u00e9 Amar Telidji Laghouat, Alg\u00e9rie","institution_ids":["https://openalex.org/I50219554"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029912490","display_name":"Aicha Chorana","orcid":null},"institutions":[{"id":"https://openalex.org/I50219554","display_name":"University of Laghouat","ror":"https://ror.org/018bbh535","country_code":"DZ","type":"education","lineage":["https://openalex.org/I50219554"]}],"countries":["DZ"],"is_corresponding":false,"raw_author_name":"Aicha Chorana","raw_affiliation_strings":["Laboratoire d'informatique et Mathmatiques Universit Amar Telidji Laghouat, Algrie","Laboratoire d'informatique et Math\u00e9matiques Universit\u00e9 Amar Telidji Laghouat, Alg\u00e9rie"],"affiliations":[{"raw_affiliation_string":"Laboratoire d'informatique et Mathmatiques Universit Amar Telidji Laghouat, Algrie","institution_ids":["https://openalex.org/I50219554"]},{"raw_affiliation_string":"Laboratoire d'informatique et Math\u00e9matiques Universit\u00e9 Amar Telidji Laghouat, Alg\u00e9rie","institution_ids":["https://openalex.org/I50219554"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060968078","display_name":"Abdallah Lakhdari","orcid":"https://orcid.org/0000-0001-8005-1534"},"institutions":[{"id":"https://openalex.org/I50219554","display_name":"University of Laghouat","ror":"https://ror.org/018bbh535","country_code":"DZ","type":"education","lineage":["https://openalex.org/I50219554"]}],"countries":["DZ"],"is_corresponding":false,"raw_author_name":"Abdallah Lakhdari","raw_affiliation_strings":["Laboratoire d'informatique et Mathmatiques Universit Amar Telidji Laghouat, Algrie","Laboratoire d'informatique et Math\u00e9matiques Universit\u00e9 Amar Telidji Laghouat, Alg\u00e9rie"],"affiliations":[{"raw_affiliation_string":"Laboratoire d'informatique et Mathmatiques Universit Amar Telidji Laghouat, Algrie","institution_ids":["https://openalex.org/I50219554"]},{"raw_affiliation_string":"Laboratoire d'informatique et Math\u00e9matiques Universit\u00e9 Amar Telidji Laghouat, Alg\u00e9rie","institution_ids":["https://openalex.org/I50219554"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046969653","display_name":"Hadda Cherroun","orcid":"https://orcid.org/0000-0002-5117-0320"},"institutions":[{"id":"https://openalex.org/I50219554","display_name":"University of Laghouat","ror":"https://ror.org/018bbh535","country_code":"DZ","type":"education","lineage":["https://openalex.org/I50219554"]}],"countries":["DZ"],"is_corresponding":false,"raw_author_name":"Hadda Cherroun","raw_affiliation_strings":["Laboratoire d'informatique et Mathmatiques Universit Amar Telidji Laghouat, Algrie","Laboratoire d'informatique et Math\u00e9matiques Universit\u00e9 Amar Telidji Laghouat, Alg\u00e9rie"],"affiliations":[{"raw_affiliation_string":"Laboratoire d'informatique et Mathmatiques Universit Amar Telidji Laghouat, Algrie","institution_ids":["https://openalex.org/I50219554"]},{"raw_affiliation_string":"Laboratoire d'informatique et Math\u00e9matiques Universit\u00e9 Amar Telidji Laghouat, Alg\u00e9rie","institution_ids":["https://openalex.org/I50219554"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5005569097"],"corresponding_institution_ids":["https://openalex.org/I50219554"],"apc_list":null,"apc_paid":null,"fwci":0.7801,"has_fulltext":true,"cited_by_count":22,"citation_normalized_percentile":{"value":0.79225229,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"138","last_page":"146"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8007110357284546},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.6763318777084351},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6220051646232605},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5747665762901306},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.5372611880302429},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5343707203865051},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5074431300163269},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33898410201072693},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.32804787158966064},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.2673078775405884}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8007110357284546},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.6763318777084351},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6220051646232605},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5747665762901306},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.5372611880302429},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5343707203865051},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5074431300163269},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33898410201072693},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.32804787158966064},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2673078775405884},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/w17-1317","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-1317","pdf_url":"https://www.aclweb.org/anthology/W17-1317.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Third Arabic Natural Language Processing Workshop","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/w17-1317","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-1317","pdf_url":"https://www.aclweb.org/anthology/W17-1317.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Third Arabic Natural Language Processing Workshop","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7200000286102295}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2740489261.pdf","grobid_xml":"https://content.openalex.org/works/W2740489261.grobid-xml"},"referenced_works_count":16,"referenced_works":["https://openalex.org/W1526974435","https://openalex.org/W2010878541","https://openalex.org/W2021513757","https://openalex.org/W2104457544","https://openalex.org/W2152428980","https://openalex.org/W2153364078","https://openalex.org/W2184360169","https://openalex.org/W2231567078","https://openalex.org/W2329255937","https://openalex.org/W2342238868","https://openalex.org/W2399262366","https://openalex.org/W2620757702","https://openalex.org/W2947579628","https://openalex.org/W3127686677","https://openalex.org/W4212990215","https://openalex.org/W4247528608"],"related_works":["https://openalex.org/W231741463","https://openalex.org/W4200068392","https://openalex.org/W2772686614","https://openalex.org/W2152945827","https://openalex.org/W114226241","https://openalex.org/W2015513221","https://openalex.org/W301864623","https://openalex.org/W2757826346","https://openalex.org/W2184371793","https://openalex.org/W2349468610"],"abstract_inverted_index":{"The":[0,77],"success":[1],"of":[2,80,155],"machine":[3],"learning":[4],"for":[5,13,43],"automatic":[6,114],"speech":[7,111],"processing":[8],"has":[9],"raised":[10],"the":[11,126,132,138],"need":[12],"large":[14],"scale":[15],"datasets.":[16],"However,":[17],"collecting":[18],"such":[19],"data":[20],"is":[21],"often":[22],"a":[23,41],"challenging":[24],"task":[25],"as":[26],"it":[27],"implies":[28],"significant":[29],"investment":[30],"involving":[31],"time":[32],"and":[33,59,115,128,147],"money":[34],"cost.":[35],"In":[36,88,105],"this":[37,94],"paper,":[38],"we":[39,90,107],"devise":[40],"recipe":[42],"building":[44,66],"largescale":[45],"Speech":[46],"Corpora":[47],"by":[48,65],"harnessing":[49],"Web":[50],"resources":[51],"namely":[52],"YouTube,":[53],"other":[54],"Social":[55],"Media,":[56],"Online":[57],"Radio":[58],"TV.":[60],"We":[61],"illustrate":[62],"our":[63,81],"methodology":[64],"KALAM'DZ,":[67],"An":[68],"Arabic":[69,142],"Spoken":[70],"corpus":[71,136],"dedicated":[72],"to":[73,125],"Algerian":[74,86,141],"dialectal":[75],"varieties.":[76],"preliminary":[78],"version":[79],"dataset":[82],"covers":[83],"all":[84],"major":[85,140],"dialects.":[87],"addition,":[89],"make":[91],"sure":[92],"that":[93,101],"material":[95],"takes":[96],"into":[97],"account":[98],"numerous":[99],"aspects":[100],"foster":[102],"its":[103],"richness.":[104],"fact,":[106],"have":[108],"targeted":[109],"various":[110],"topics.":[112],"Some":[113],"manual":[116],"annotations":[117],"are":[118],"provided.":[119],"They":[120],"gather":[121],"useful":[122],"information":[123,130],"related":[124],"speakers":[127,146],"sub-dialect":[129],"at":[131,156],"utterance":[133],"level.":[134],"Our":[135],"encompasses":[137],"8":[139],"sub-dialects":[143],"with":[144],"4881":[145],"more":[148],"than":[149],"104.4":[150],"hours":[151],"segmented":[152],"in":[153],"utterances":[154],"least":[157],"6":[158],"s.":[159]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
