{"id":"https://openalex.org/W2996814786","doi":"https://doi.org/10.3390/info11010024","title":"Punctuation and Parallel Corpus Based Word Embedding Model for Low-Resource Languages","display_name":"Punctuation and Parallel Corpus Based Word Embedding Model for Low-Resource Languages","publication_year":2019,"publication_date":"2019-12-29","ids":{"openalex":"https://openalex.org/W2996814786","doi":"https://doi.org/10.3390/info11010024","mag":"2996814786"},"language":"en","primary_location":{"id":"doi:10.3390/info11010024","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info11010024","pdf_url":"https://www.mdpi.com/2078-2489/11/1/24/pdf?version=1577615588","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2078-2489/11/1/24/pdf?version=1577615588","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101869268","display_name":"Yang Yuan","orcid":"https://orcid.org/0000-0002-0849-1137"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210106108","display_name":"Xinjiang Technical Institute of Physics & Chemistry","ror":"https://ror.org/00x44h034","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210106108"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Yuan","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing 100049, China","Xinjiang Laboratory of Minority Speech and Language Information Processing, Urumqi 830011, China","Xinjiang Technical Institute of Physics &amp; Chemistry, Chinese Academy of Sciences, Urumqi 830011, China"],"raw_orcid":"https://orcid.org/0000-0002-0849-1137","affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing 100049, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Xinjiang Laboratory of Minority Speech and Language Information Processing, Urumqi 830011, China","institution_ids":[]},{"raw_affiliation_string":"Xinjiang Technical Institute of Physics &amp; Chemistry, Chinese Academy of Sciences, Urumqi 830011, China","institution_ids":["https://openalex.org/I4210106108","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100654885","display_name":"Li Xiao","orcid":"https://orcid.org/0000-0002-3063-0869"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210106108","display_name":"Xinjiang Technical Institute of Physics & Chemistry","ror":"https://ror.org/00x44h034","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210106108"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiao Li","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing 100049, China","Xinjiang Laboratory of Minority Speech and Language Information Processing, Urumqi 830011, China","Xinjiang Technical Institute of Physics &amp; Chemistry, Chinese Academy of Sciences, Urumqi 830011, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing 100049, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Xinjiang Laboratory of Minority Speech and Language Information Processing, Urumqi 830011, China","institution_ids":[]},{"raw_affiliation_string":"Xinjiang Technical Institute of Physics &amp; Chemistry, Chinese Academy of Sciences, Urumqi 830011, China","institution_ids":["https://openalex.org/I4210106108","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034313244","display_name":"Yating Yang","orcid":"https://orcid.org/0000-0002-2639-3944"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210106108","display_name":"Xinjiang Technical Institute of Physics & Chemistry","ror":"https://ror.org/00x44h034","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210106108"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ya-Ting Yang","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing 100049, China","Xinjiang Laboratory of Minority Speech and Language Information Processing, Urumqi 830011, China","Xinjiang Technical Institute of Physics &amp; Chemistry, Chinese Academy of Sciences, Urumqi 830011, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing 100049, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Xinjiang Laboratory of Minority Speech and Language Information Processing, Urumqi 830011, China","institution_ids":[]},{"raw_affiliation_string":"Xinjiang Technical Institute of Physics &amp; Chemistry, Chinese Academy of Sciences, Urumqi 830011, China","institution_ids":["https://openalex.org/I4210106108","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100654885"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210106108","https://openalex.org/I4210165038"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17036236,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"11","issue":"1","first_page":"24","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7737530469894409},{"id":"https://openalex.org/keywords/punctuation","display_name":"Punctuation","score":0.7692657709121704},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.7272447347640991},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6577855348587036},{"id":"https://openalex.org/keywords/word2vec","display_name":"Word2vec","score":0.6220910549163818},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.5909364223480225},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5908117294311523},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5174791812896729},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4741646349430084},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4639759659767151},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.46196526288986206},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4000224471092224},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.19118723273277283}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7737530469894409},{"id":"https://openalex.org/C540372491","wikidata":"https://www.wikidata.org/wiki/Q82622","display_name":"Punctuation","level":2,"score":0.7692657709121704},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.7272447347640991},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6577855348587036},{"id":"https://openalex.org/C2776461190","wikidata":"https://www.wikidata.org/wiki/Q22673982","display_name":"Word2vec","level":3,"score":0.6220910549163818},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.5909364223480225},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5908117294311523},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5174791812896729},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4741646349430084},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4639759659767151},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.46196526288986206},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4000224471092224},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.19118723273277283},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/info11010024","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info11010024","pdf_url":"https://www.mdpi.com/2078-2489/11/1/24/pdf?version=1577615588","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:b3d604c25804439c81b52648e2be13c3","is_oa":true,"landing_page_url":"https://doaj.org/article/b3d604c25804439c81b52648e2be13c3","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, Vol 11, Iss 1, p 24 (2019)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2078-2489/11/1/24/","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3390/info11010024","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/info11010024","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info11010024","pdf_url":"https://www.mdpi.com/2078-2489/11/1/24/pdf?version=1577615588","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1043054566","display_name":null,"funder_award_id":"2017-XBQNXZ-A-005","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2109548048","display_name":null,"funder_award_id":"Y839031201","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2491706362","display_name":null,"funder_award_id":"2017-XBQNXZ-A-005","funder_id":"https://openalex.org/F4320328015","funder_display_name":"West Light Foundation of the Chinese Academy of Sciences"},{"id":"https://openalex.org/G4801340082","display_name":null,"funder_award_id":"2017472","funder_id":"https://openalex.org/F4320321133","funder_display_name":"Chinese Academy of Sciences"},{"id":"https://openalex.org/G5413428804","display_name":null,"funder_award_id":"2017472","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5914510038","display_name":null,"funder_award_id":"U1703133","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G840390215","display_name":null,"funder_award_id":"2016A03007-3","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8428233225","display_name":null,"funder_award_id":"2017472","funder_id":"https://openalex.org/F4320322847","funder_display_name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"},{"id":"https://openalex.org/F4320322847","display_name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","ror":"https://ror.org/031141b54"},{"id":"https://openalex.org/F4320328015","display_name":"West Light Foundation of the Chinese Academy of Sciences","ror":null},{"id":"https://openalex.org/F4320335892","display_name":"Youth Innovation Promotion Association","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2996814786.pdf","grobid_xml":"https://content.openalex.org/works/W2996814786.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W183201499","https://openalex.org/W1614298861","https://openalex.org/W1662133657","https://openalex.org/W1836521361","https://openalex.org/W1978400666","https://openalex.org/W2053921957","https://openalex.org/W2080100102","https://openalex.org/W2080373976","https://openalex.org/W2103318667","https://openalex.org/W2116343275","https://openalex.org/W2125076245","https://openalex.org/W2131462252","https://openalex.org/W2138204974","https://openalex.org/W2147152072","https://openalex.org/W2153579005","https://openalex.org/W2158899491","https://openalex.org/W2164019165","https://openalex.org/W2168185617","https://openalex.org/W2250539671","https://openalex.org/W2250683455","https://openalex.org/W2251012068","https://openalex.org/W2251771443","https://openalex.org/W2293759818","https://openalex.org/W2513522215","https://openalex.org/W2769566324","https://openalex.org/W2787560479","https://openalex.org/W2798908575","https://openalex.org/W2799410579","https://openalex.org/W2896457183","https://openalex.org/W2940008433","https://openalex.org/W2950133940","https://openalex.org/W2952230511","https://openalex.org/W2962739339","https://openalex.org/W2964005834","https://openalex.org/W2964266061","https://openalex.org/W2970049541","https://openalex.org/W2998704965","https://openalex.org/W3216404684","https://openalex.org/W4285719527","https://openalex.org/W6677674009","https://openalex.org/W6680106237","https://openalex.org/W6680532216","https://openalex.org/W6683738474","https://openalex.org/W6759455113"],"related_works":["https://openalex.org/W2946409105","https://openalex.org/W3152932816","https://openalex.org/W2985392712","https://openalex.org/W4388996947","https://openalex.org/W2798009317","https://openalex.org/W4382201653","https://openalex.org/W3203949288","https://openalex.org/W3133567596","https://openalex.org/W4312127319","https://openalex.org/W2998070955"],"abstract_inverted_index":{"To":[0],"overcome":[1],"the":[2,27,33,42,48,76,87,100,106,112],"data":[3],"sparseness":[4],"in":[5,9,99,109],"word":[6,20,44,55,79,101,113],"embedding":[7,21,80],"trained":[8],"low-resource":[10,82],"languages,":[11],"we":[12,25],"propose":[13],"a":[14],"punctuation":[15],"and":[16,38,71,104],"parallel":[17,51],"corpus":[18,52],"based":[19],"model.":[22],"In":[23],"particular,":[24],"generate":[26],"global":[28],"word-pair":[29],"co-occurrence":[30],"matrix":[31],"with":[32,41,62],"punctuation-based":[34],"distance":[35],"attenuation":[36],"function,":[37],"integrate":[39],"it":[40],"intermediate":[43],"vectors":[45],"generated":[46],"from":[47],"small-scale":[49],"bilingual":[50],"to":[53],"train":[54],"embedding.":[56],"Experimental":[57],"results":[58,108],"show":[59],"that":[60],"compared":[61],"several":[63],"widely":[64],"used":[65],"baseline":[66],"models":[67],"such":[68],"as":[69],"GloVe":[70],"Word2vec,":[72],"our":[73,91],"model":[74,92],"improves":[75],"performance":[77],"of":[78,111],"for":[81],"language":[83],"significantly.":[84],"Trained":[85],"on":[86],"restricted-scale":[88],"English-Chinese":[89],"corpus,":[90],"has":[93],"improved":[94],"by":[95],"0.71":[96],"percentage":[97],"points":[98],"analogy":[102],"task,":[103],"achieved":[105],"best":[107],"all":[110],"similarity":[114],"tasks.":[115]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
