{"id":"https://openalex.org/W2395019927","doi":"https://doi.org/10.21437/interspeech.2014-317","title":"Unsupervised language filtering using the latent dirichlet allocation","display_name":"Unsupervised language filtering using the latent dirichlet allocation","publication_year":2014,"publication_date":"2014-09-14","ids":{"openalex":"https://openalex.org/W2395019927","doi":"https://doi.org/10.21437/interspeech.2014-317","mag":"2395019927"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2014-317","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2014-317","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2014","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Wei Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Zhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084488961","display_name":"Robert A. Clark","orcid":"https://orcid.org/0000-0002-4892-3619"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Robert A. J. Clark","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5011147842","display_name":"Yongyuan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yongyuan Wang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.22065142,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1268","last_page":"1272"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.8507499098777771},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8363096714019775},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.7097330093383789},{"id":"https://openalex.org/keywords/hierarchical-dirichlet-process","display_name":"Hierarchical Dirichlet process","score":0.6844142079353333},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6736537218093872},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6506422162055969},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6170933246612549},{"id":"https://openalex.org/keywords/gibbs-sampling","display_name":"Gibbs sampling","score":0.5930763483047485},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5489166975021362},{"id":"https://openalex.org/keywords/dirichlet-distribution","display_name":"Dirichlet distribution","score":0.5188032984733582},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.4993593692779541},{"id":"https://openalex.org/keywords/smoothing","display_name":"Smoothing","score":0.4680853486061096},{"id":"https://openalex.org/keywords/cache-language-model","display_name":"Cache language model","score":0.4438866674900055},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.41506925225257874},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.3987162113189697},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.30525946617126465},{"id":"https://openalex.org/keywords/universal-networking-language","display_name":"Universal Networking Language","score":0.14414721727371216},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10758638381958008}],"concepts":[{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.8507499098777771},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8363096714019775},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.7097330093383789},{"id":"https://openalex.org/C141318989","wikidata":"https://www.wikidata.org/wiki/Q5753066","display_name":"Hierarchical Dirichlet process","level":4,"score":0.6844142079353333},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6736537218093872},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6506422162055969},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6170933246612549},{"id":"https://openalex.org/C158424031","wikidata":"https://www.wikidata.org/wiki/Q1191905","display_name":"Gibbs sampling","level":3,"score":0.5930763483047485},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5489166975021362},{"id":"https://openalex.org/C169214877","wikidata":"https://www.wikidata.org/wiki/Q981016","display_name":"Dirichlet distribution","level":3,"score":0.5188032984733582},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.4993593692779541},{"id":"https://openalex.org/C3770464","wikidata":"https://www.wikidata.org/wiki/Q775963","display_name":"Smoothing","level":2,"score":0.4680853486061096},{"id":"https://openalex.org/C39608478","wikidata":"https://www.wikidata.org/wiki/Q5015979","display_name":"Cache language model","level":5,"score":0.4438866674900055},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.41506925225257874},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.3987162113189697},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.30525946617126465},{"id":"https://openalex.org/C83479923","wikidata":"https://www.wikidata.org/wiki/Q2063748","display_name":"Universal Networking Language","level":4,"score":0.14414721727371216},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10758638381958008},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C182310444","wikidata":"https://www.wikidata.org/wiki/Q1332643","display_name":"Boundary value problem","level":2,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C129353971","wikidata":"https://www.wikidata.org/wiki/Q5156949","display_name":"Comprehension approach","level":3,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2014-317","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2014-317","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2014","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.720.2256","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.720.2256","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cstr.ed.ac.uk/downloads/publications/2014/zhang2014.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7799999713897705,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W80999043","https://openalex.org/W87844232","https://openalex.org/W1533946607","https://openalex.org/W1542720960","https://openalex.org/W1585280831","https://openalex.org/W1753184066","https://openalex.org/W1753361524","https://openalex.org/W1880262756","https://openalex.org/W1951381097","https://openalex.org/W2001082470","https://openalex.org/W2014516359","https://openalex.org/W2036822660","https://openalex.org/W2054658115","https://openalex.org/W2081748579","https://openalex.org/W2087937280","https://openalex.org/W2106403442","https://openalex.org/W2116137244","https://openalex.org/W2118034653","https://openalex.org/W2122888626","https://openalex.org/W2123660869","https://openalex.org/W2141476920","https://openalex.org/W2144100511","https://openalex.org/W2144430322","https://openalex.org/W2147986626","https://openalex.org/W2151773777","https://openalex.org/W2159544173","https://openalex.org/W2161353674","https://openalex.org/W2169200297","https://openalex.org/W2181715383","https://openalex.org/W2533388221"],"related_works":["https://openalex.org/W2048766621","https://openalex.org/W2939843948","https://openalex.org/W2803512450","https://openalex.org/W2097627380","https://openalex.org/W2053909857","https://openalex.org/W2008338582","https://openalex.org/W2162057505","https://openalex.org/W2185463984","https://openalex.org/W1999586157","https://openalex.org/W2352674739"],"abstract_inverted_index":{"To":[0],"automatically":[1],"build":[2],"from":[3,29],"scratch":[4],"the":[5,50,54,57,79,100,128],"language":[6,17,68,101,114,130],"processing":[7],"component":[8],"for":[9,99],"a":[10,15,18,61,121,132],"speech":[11],"synthesis":[12],"system":[13],"in":[14,53,131],"new":[16],"purified":[19],"text":[20],"corpora":[21],"is":[22,44,60,97,108,123],"needed":[23],"where":[24,42,76],"any":[25,86],"words":[26],"and":[27,41,104,134],"phrases":[28],"other":[30,137],"languages":[31,138],"are":[32],"clearly":[33],"identified":[34],"or":[35,89],"excluded.":[36],"When":[37],"using":[38],"found":[39],"data":[40,59],"there":[43],"no":[45],"inherent":[46],"linguistic":[47],"knowledge":[48],"of":[49,126],"language/languages":[51],"contained":[52],"data,":[55],"identifying":[56,127],"pure":[58],"difficult":[62],"problem.":[63],"We":[64,117],"propose":[65],"an":[66,112],"unsupervised":[67,113],"identification":[69,102],"ap-proach":[70],"based":[71],"on":[72],"Latent":[73,92],"Dirichlet":[74,93],"Allocation":[75,94],"we":[77],"take":[78],"raw":[80],"n-gram":[81],"count":[82],"as":[83],"features":[84],"without":[85],"smoothing,":[87],"pruning":[88],"interpolation.":[90],"The":[91],"topic":[95],"model":[96,122],"reformulated":[98],"task":[103],"Collapsed":[105],"Gibbs":[106],"Sampling":[107],"used":[109],"to":[110],"train":[111],"iden-tification":[115],"model.":[116],"show":[118],"that":[119],"such":[120],"highly":[124],"capable":[125],"primary":[129],"corpus":[133],"filtering":[135],"out":[136],"present.":[139]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
