{"id":"https://openalex.org/W1520886244","doi":"https://doi.org/10.21437/eurospeech.2003-779","title":"Using the web for fast language model construction in minority languages","display_name":"Using the web for fast language model construction in minority languages","publication_year":2003,"publication_date":"2003-09-01","ids":{"openalex":"https://openalex.org/W1520886244","doi":"https://doi.org/10.21437/eurospeech.2003-779","mag":"1520886244"},"language":"en","primary_location":{"id":"doi:10.21437/eurospeech.2003-779","is_oa":false,"landing_page_url":"https://doi.org/10.21437/eurospeech.2003-779","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"8th European Conference on Speech Communication and Technology (Eurospeech 2003)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-01392377","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110159187","display_name":"Viet Bac Le","orcid":"https://orcid.org/0000-0002-7267-6750"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Viet Bac Le","raw_affiliation_strings":["\u00a3 CLIPS-IMAG Laboratory, UMR CNRS 5524 BP 53, 38041 Grenoble Cedex 9, France \u00a3\u00a3 MICA Center, 1 Dai Co Viet, Hanoi, Vietnam"],"affiliations":[{"raw_affiliation_string":"\u00a3 CLIPS-IMAG Laboratory, UMR CNRS 5524 BP 53, 38041 Grenoble Cedex 9, France \u00a3\u00a3 MICA Center, 1 Dai Co Viet, Hanoi, Vietnam","institution_ids":["https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041939827","display_name":"Brigitte Bigi","orcid":"https://orcid.org/0000-0003-1834-6918"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Brigitte Bigi","raw_affiliation_strings":["\u00a3 CLIPS-IMAG Laboratory, UMR CNRS 5524 BP 53, 38041 Grenoble Cedex 9, France \u00a3\u00a3 MICA Center, 1 Dai Co Viet, Hanoi, Vietnam"],"affiliations":[{"raw_affiliation_string":"\u00a3 CLIPS-IMAG Laboratory, UMR CNRS 5524 BP 53, 38041 Grenoble Cedex 9, France \u00a3\u00a3 MICA Center, 1 Dai Co Viet, Hanoi, Vietnam","institution_ids":["https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040820339","display_name":"Laurent Besacier","orcid":"https://orcid.org/0000-0001-7411-9125"},"institutions":[{"id":"https://openalex.org/I4210134356","display_name":"Laboratoire G\u00e9nie Industriel","ror":"https://ror.org/0455wwj08","country_code":"FR","type":"facility","lineage":["https://openalex.org/I277688954","https://openalex.org/I277688954","https://openalex.org/I4210107720","https://openalex.org/I4210134356"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Laurent Besacier","raw_affiliation_strings":["Environnements et outils pour le G\u00e9nie Logiciel Industriel"],"affiliations":[{"raw_affiliation_string":"Environnements et outils pour le G\u00e9nie Logiciel Industriel","institution_ids":["https://openalex.org/I4210134356"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038215006","display_name":"Eric Castelli","orcid":"https://orcid.org/0000-0003-2978-2619"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Eric Castelli","raw_affiliation_strings":["\u00a3 CLIPS-IMAG Laboratory, UMR CNRS 5524 BP 53, 38041 Grenoble Cedex 9, France \u00a3\u00a3 MICA Center, 1 Dai Co Viet, Hanoi, Vietnam"],"affiliations":[{"raw_affiliation_string":"\u00a3 CLIPS-IMAG Laboratory, UMR CNRS 5524 BP 53, 38041 Grenoble Cedex 9, France \u00a3\u00a3 MICA Center, 1 Dai Co Viet, Hanoi, Vietnam","institution_ids":["https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5110159187"],"corresponding_institution_ids":["https://openalex.org/I1294671590"],"apc_list":null,"apc_paid":null,"fwci":1.8719,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.86928524,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"3117","last_page":"3120"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.876465916633606},{"id":"https://openalex.org/keywords/vietnamese","display_name":"Vietnamese","score":0.6669267416000366},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6392204761505127},{"id":"https://openalex.org/keywords/minority-language","display_name":"Minority language","score":0.5829309225082397},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5600179433822632},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5436538457870483},{"id":"https://openalex.org/keywords/cache-language-model","display_name":"Cache language model","score":0.5360178351402283},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.5047973394393921},{"id":"https://openalex.org/keywords/constructed-language","display_name":"Constructed language","score":0.4688359797000885},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4683510661125183},{"id":"https://openalex.org/keywords/universal-networking-language","display_name":"Universal Networking Language","score":0.43488267064094543},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.24902737140655518},{"id":"https://openalex.org/keywords/comprehension-approach","display_name":"Comprehension approach","score":0.20142605900764465},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.18106117844581604}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.876465916633606},{"id":"https://openalex.org/C103621254","wikidata":"https://www.wikidata.org/wiki/Q9199","display_name":"Vietnamese","level":2,"score":0.6669267416000366},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6392204761505127},{"id":"https://openalex.org/C2776847045","wikidata":"https://www.wikidata.org/wiki/Q61566","display_name":"Minority language","level":2,"score":0.5829309225082397},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5600179433822632},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5436538457870483},{"id":"https://openalex.org/C39608478","wikidata":"https://www.wikidata.org/wiki/Q5015979","display_name":"Cache language model","level":5,"score":0.5360178351402283},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.5047973394393921},{"id":"https://openalex.org/C94922259","wikidata":"https://www.wikidata.org/wiki/Q33215","display_name":"Constructed language","level":2,"score":0.4688359797000885},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4683510661125183},{"id":"https://openalex.org/C83479923","wikidata":"https://www.wikidata.org/wiki/Q2063748","display_name":"Universal Networking Language","level":4,"score":0.43488267064094543},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.24902737140655518},{"id":"https://openalex.org/C129353971","wikidata":"https://www.wikidata.org/wiki/Q5156949","display_name":"Comprehension approach","level":3,"score":0.20142605900764465},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.18106117844581604},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.21437/eurospeech.2003-779","is_oa":false,"landing_page_url":"https://doi.org/10.21437/eurospeech.2003-779","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"8th European Conference on Speech Communication and Technology (Eurospeech 2003)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.142.6241","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.142.6241","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www-clips.imag.fr/geod/User/viet-bac.le/Publications/levb2003eurospeech.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.142.8514","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.142.8514","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www-clips.imag.fr/geod/User/viet-bac.le/outils/levb03eurospeech.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.60.405","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.60.405","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www-clips.imag.fr/geod/User/viet-bac.le/Publications/levb2003eurospeech.ps","raw_type":"text"},{"id":"pmh:oai:HAL:hal-01392377v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01392377","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Eurospeech, 2003, Geneva, Switzerland. pp.3117--3120","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-01392377v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01392377","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Eurospeech, 2003, Geneva, Switzerland. pp.3117--3120","raw_type":"Conference papers"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7900000214576721}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W132275670","https://openalex.org/W207483639","https://openalex.org/W1562082452","https://openalex.org/W1631260214","https://openalex.org/W1996903695","https://openalex.org/W2033436836","https://openalex.org/W2090146924","https://openalex.org/W2145489946","https://openalex.org/W2160080653","https://openalex.org/W3017461453"],"related_works":["https://openalex.org/W3213549959","https://openalex.org/W3129739276","https://openalex.org/W2183673572","https://openalex.org/W3075498906","https://openalex.org/W2913520953","https://openalex.org/W4310801723","https://openalex.org/W4387896144","https://openalex.org/W2515884433","https://openalex.org/W3087963919","https://openalex.org/W2808455255"],"abstract_inverted_index":{"The":[0],"design":[1],"and":[2,59,74,89],"construction":[3,42,76],"of":[4,54,77,87,99],"a":[5,12,20,35,72,78,82,114,123,127],"language":[6,21,40,79,102,106,116,125,129],"model":[7,41,80,130],"for":[8,27,38,122],"minority":[9,16,44,124],"languages":[10,45],"is":[11,46,49],"hard":[13],"task.":[14],"By":[15,64],"language,":[17],"we":[18],"mean":[19],"with":[22,81,136],"small":[23,83],"available":[24],"resources,":[25],"especially":[26],"the":[28,52,100,109,119],"statistical":[29],"learning":[30],"problem.":[31],"In":[32],"this":[33,69],"paper,":[34],"new":[36,139],"methodology":[37,70],"fast":[39],"in":[43,85,133],"proposed.":[47],"It":[48],"based":[50],"on":[51,113],"use":[53],"Web":[55,101],"resources":[56],"to":[57,141],"collect":[58],"make":[60],"efficient":[61,66,75],"textual":[62],"corpora.":[63],"using":[65,108],"filtering":[67,111,144],"techniques,":[68],"allows":[71],"quick":[73],"cost":[84],"term":[86],"computational":[88],"human":[90],"resources.":[91],"Our":[92],"primary":[93],"experiments":[94],"have":[95],"shown":[96],"excellent":[97],"performance":[98],"models":[103,107],"vs":[104],"newspaper":[105],"proposed":[110],"methods":[112],"majority":[115],"(French).":[117],"Following":[118],"same":[120],"way":[121],"(Vietnamese),":[126],"valuable":[128],"was":[131],"constructed":[132],"3":[134],"month":[135],"only":[137],"15%":[138],"development":[140],"convert":[142],"some":[143],"tools.":[145]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
