{"id":"https://openalex.org/W3094075253","doi":"https://doi.org/10.1145/3443279.3443309","title":"Gender Prediction Based on Vietnamese Names with Machine Learning Techniques","display_name":"Gender Prediction Based on Vietnamese Names with Machine Learning Techniques","publication_year":2020,"publication_date":"2020-12-18","ids":{"openalex":"https://openalex.org/W3094075253","doi":"https://doi.org/10.1145/3443279.3443309","mag":"3094075253"},"language":"en","primary_location":{"id":"doi:10.1145/3443279.3443309","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3443279.3443309","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th International Conference on Natural Language Processing and Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2010.10852","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Huy Quoc To","orcid":null},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Huy Quoc To","raw_affiliation_strings":["University of Information Technology, Ho Chi Minh City, Vietnam Vietnam National University, Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh City, Vietnam Vietnam National University, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I123565023"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kiet Van Nguyen","orcid":null},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Kiet Van Nguyen","raw_affiliation_strings":["University of Information Technology, Ho Chi Minh City, Vietnam Vietnam National University, Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh City, Vietnam Vietnam National University, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I123565023"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ngan Luu-Thuy Nguyen","orcid":null},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Ngan Luu-Thuy Nguyen","raw_affiliation_strings":["University of Information Technology, Ho Chi Minh City, Vietnam Vietnam National University, Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh City, Vietnam Vietnam National University, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I123565023"]}]},{"author_position":"last","author":{"id":null,"display_name":"Anh Gia-Tuan Nguyen","orcid":null},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Anh Gia-Tuan Nguyen","raw_affiliation_strings":["University of Information Technology, Ho Chi Minh City, Vietnam Vietnam National University, Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"University of Information Technology, Ho Chi Minh City, Vietnam Vietnam National University, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I123565023"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I123565023"],"apc_list":null,"apc_paid":null,"fwci":0.8228,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.79526887,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"55","last_page":"60"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12970","display_name":"Names, Identity, and Discrimination Research","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.953000009059906,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vietnamese","display_name":"Vietnamese","score":0.871999979019165},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.5781000256538391},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.45010000467300415},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.44359999895095825},{"id":"https://openalex.org/keywords/multinomial-logistic-regression","display_name":"Multinomial logistic regression","score":0.4375999867916107},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.3474000096321106},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.33320000767707825},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3305000066757202}],"concepts":[{"id":"https://openalex.org/C103621254","wikidata":"https://www.wikidata.org/wiki/Q9199","display_name":"Vietnamese","level":2,"score":0.871999979019165},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7900000214576721},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6929000020027161},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6561999917030334},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.5781000256538391},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5778999924659729},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.45010000467300415},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.44359999895095825},{"id":"https://openalex.org/C117568660","wikidata":"https://www.wikidata.org/wiki/Q1650843","display_name":"Multinomial logistic regression","level":2,"score":0.4375999867916107},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.3474000096321106},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.33320000767707825},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3305000066757202},{"id":"https://openalex.org/C152565575","wikidata":"https://www.wikidata.org/wiki/Q1124538","display_name":"Conditional random field","level":2,"score":0.3303000032901764},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.32820001244544983},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.31940001249313354},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.3052000105381012},{"id":"https://openalex.org/C110083411","wikidata":"https://www.wikidata.org/wiki/Q1744628","display_name":"Statistical classification","level":2,"score":0.2818000018596649},{"id":"https://openalex.org/C152361515","wikidata":"https://www.wikidata.org/wiki/Q181328","display_name":"Bernoulli's principle","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.2761000096797943},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.27399998903274536},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.27160000801086426}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3443279.3443309","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3443279.3443309","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th International Conference on Natural Language Processing and Information Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2010.10852","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2010.10852","pdf_url":"https://arxiv.org/pdf/2010.10852","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2010.10852","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2010.10852","pdf_url":"https://arxiv.org/pdf/2010.10852","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1005303548","https://openalex.org/W1493036841","https://openalex.org/W1988906723","https://openalex.org/W2113268290","https://openalex.org/W2124549482","https://openalex.org/W2165428648","https://openalex.org/W2571845441","https://openalex.org/W2785912406","https://openalex.org/W2950437998","https://openalex.org/W2964992537","https://openalex.org/W2982521860"],"related_works":[],"abstract_inverted_index":{"As":[0,132],"biological":[1],"gender":[2,18,50,112],"is":[3,68,142],"one":[4],"of":[5,8,125],"the":[6,123,135],"aspects":[7],"presenting":[9],"individual":[10],"human,":[11],"much":[12],"work":[13],"has":[14],"been":[15,36],"done":[16,39],"on":[17,21,53,70,114,129,146,156],"classification":[19],"based":[20,52,155],"people":[22],"names.":[23,55,116],"The":[24],"proposals":[25],"for":[26,40,49,73,111],"English":[27],"and":[28,98,101,121,149],"Chinese":[29],"languages":[30],"are":[31],"tremendous;":[32],"still,":[33],"there":[34],"have":[35,140],"few":[37],"works":[38],"Vietnamese":[41,54,115],"so":[42],"far.":[43],"We":[44,117],"propose":[45],"a":[46,102,119,133,152],"new":[47],"dataset":[48,57,67,120],"prediction":[51,113],"This":[56,66],"comprises":[58],"over":[59],"26,000":[60],"full":[61],"names":[62],"annotated":[63],"with":[64,107],"genders.":[65],"available":[69],"our":[71,157],"website":[72],"research":[74],"purposes.":[75],"In":[76],"addition,":[77],"this":[78],"paper":[79],"describes":[80],"six":[81],"machine":[82],"learning":[83,104],"algorithms":[84],"(Support":[85],"Vector":[86],"Machine,":[87],"Multinomial":[88],"Naive":[89,92],"Bayes,":[90,93],"Bernoulli":[91],"Decision":[94],"Tree,":[95],"Random":[96],"Forrest":[97],"Logistic":[99],"Regression)":[100],"deep":[103],"model":[105,148],"(LSTM)":[106],"fastText":[108],"word":[109],"embedding":[110],"create":[118],"investigate":[122],"impact":[124],"each":[126],"name":[127],"component":[128],"detecting":[130],"gender.":[131],"result,":[134],"best":[136],"F1-score":[137],"that":[138],"we":[139,150],"achieved":[141],"up":[143],"to":[144],"96%":[145],"LSTM":[147],"generate":[151],"web":[153],"API":[154],"trained":[158],"model.":[159]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2020-10-29T00:00:00"}
