{"id":"https://openalex.org/W7126016593","doi":"https://doi.org/10.1145/3778265.3778272","title":"A Vietnamese Dataset for Text Segmentation and Multiple Choices Reading Comprehension","display_name":"A Vietnamese Dataset for Text Segmentation and Multiple Choices Reading Comprehension","publication_year":2025,"publication_date":"2025-10-29","ids":{"openalex":"https://openalex.org/W7126016593","doi":"https://doi.org/10.1145/3778265.3778272"},"language":null,"primary_location":{"id":"doi:10.1145/3778265.3778272","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3778265.3778272","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 7th International Conference on Big-data Service and Intelligent Computation","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3778265.3778272","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Toan Hai Nguyen","orcid":"https://orcid.org/0009-0008-9483-7472"},"institutions":[{"id":"https://openalex.org/I67868205","display_name":"VNU University of Science","ror":"https://ror.org/05w54hk79","country_code":"VN","type":"education","lineage":["https://openalex.org/I177233841","https://openalex.org/I67868205"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Toan Hai Nguyen","raw_affiliation_strings":["Institute for Artificial Intelligence, VNU University of Engineering and Technology, Hanoi, Vietnam"],"raw_orcid":"https://orcid.org/0009-0008-9483-7472","affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence, VNU University of Engineering and Technology, Hanoi, Vietnam","institution_ids":["https://openalex.org/I67868205"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Duc Minh Do","orcid":"https://orcid.org/0009-0008-4116-4067"},"institutions":[{"id":"https://openalex.org/I67868205","display_name":"VNU University of Science","ror":"https://ror.org/05w54hk79","country_code":"VN","type":"education","lineage":["https://openalex.org/I177233841","https://openalex.org/I67868205"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Duc Minh Do","raw_affiliation_strings":["Faculty of Information Technology, VNU University of Engineering and Technology, Hanoi, Vietnam"],"raw_orcid":"https://orcid.org/0009-0008-4116-4067","affiliations":[{"raw_affiliation_string":"Faculty of Information Technology, VNU University of Engineering and Technology, Hanoi, Vietnam","institution_ids":["https://openalex.org/I67868205"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124253276","display_name":"Truong Xuan Quan","orcid":null},"institutions":[{"id":"https://openalex.org/I67868205","display_name":"VNU University of Science","ror":"https://ror.org/05w54hk79","country_code":"VN","type":"education","lineage":["https://openalex.org/I177233841","https://openalex.org/I67868205"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Truong Xuan Quan","raw_affiliation_strings":["Faculty of Information Technology, VNU University of Engineering and Technology, Hanoi, Vietnam"],"raw_orcid":"https://orcid.org/0009-0001-4911-3394","affiliations":[{"raw_affiliation_string":"Faculty of Information Technology, VNU University of Engineering and Technology, Hanoi, Vietnam","institution_ids":["https://openalex.org/I67868205"]}]},{"author_position":"last","author":{"id":null,"display_name":"Ha Viet Nguyen","orcid":"https://orcid.org/0009-0005-5237-5036"},"institutions":[{"id":"https://openalex.org/I67868205","display_name":"VNU University of Science","ror":"https://ror.org/05w54hk79","country_code":"VN","type":"education","lineage":["https://openalex.org/I177233841","https://openalex.org/I67868205"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Ha Viet Nguyen","raw_affiliation_strings":["Institute for Artificial Intelligence, VNU University of Engineering and Technology, Hanoi, Vietnam"],"raw_orcid":"https://orcid.org/0009-0005-5237-5036","affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence, VNU University of Engineering and Technology, Hanoi, Vietnam","institution_ids":["https://openalex.org/I67868205"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I67868205"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.83098461,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"42","last_page":"50"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.39329999685287476,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.39329999685287476,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.33239999413490295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.10350000113248825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vietnamese","display_name":"Vietnamese","score":0.8241000175476074},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7376000285148621},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.6697999835014343},{"id":"https://openalex.org/keywords/comprehension","display_name":"Comprehension","score":0.6140999794006348},{"id":"https://openalex.org/keywords/reading-comprehension","display_name":"Reading comprehension","score":0.5015000104904175},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4982999861240387},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.48249998688697815},{"id":"https://openalex.org/keywords/reading","display_name":"Reading (process)","score":0.4799000024795532}],"concepts":[{"id":"https://openalex.org/C103621254","wikidata":"https://www.wikidata.org/wiki/Q9199","display_name":"Vietnamese","level":2,"score":0.8241000175476074},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7810999751091003},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7562999725341797},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7376000285148621},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7281000018119812},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.6697999835014343},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.6140999794006348},{"id":"https://openalex.org/C2778780117","wikidata":"https://www.wikidata.org/wiki/Q3269423","display_name":"Reading comprehension","level":3,"score":0.5015000104904175},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4982999861240387},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.48249998688697815},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.4799000024795532},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.46160000562667847},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.35010001063346863},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.34940001368522644},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3465999960899353},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3353999853134155},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.30090001225471497},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.30079999566078186},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.2994000017642975},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.29789999127388},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.27649998664855957},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2540999948978424}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3778265.3778272","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3778265.3778272","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 7th International Conference on Big-data Service and Intelligent Computation","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3778265.3778272","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3778265.3778272","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 7th International Conference on Big-data Service and Intelligent Computation","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8793522715568542,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1557074680","https://openalex.org/W1603526456","https://openalex.org/W2159083595","https://openalex.org/W2606964149","https://openalex.org/W2893316907","https://openalex.org/W2914694065","https://openalex.org/W2962716111","https://openalex.org/W2963339397","https://openalex.org/W2963748441","https://openalex.org/W2964223283","https://openalex.org/W2970263339","https://openalex.org/W3035390927","https://openalex.org/W3035497479","https://openalex.org/W3098637735","https://openalex.org/W3113790969","https://openalex.org/W3118485687","https://openalex.org/W3127115863","https://openalex.org/W3207773588","https://openalex.org/W4286904065","https://openalex.org/W4386566462","https://openalex.org/W6948244744"],"related_works":[],"abstract_inverted_index":{"Vietnamese,":[0,117],"the":[1,37],"20th":[2],"most":[3],"spoken":[4],"language":[5,18],"with":[6,65],"over":[7],"102":[8],"million":[9],"native":[10],"speakers,":[11],"lacks":[12],"robust":[13],"resources":[14],"for":[15,55,116],"key":[16],"natural":[17],"processing":[19],"tasks":[20,115],"such":[21],"as":[22],"text":[23,56,102],"segmentation":[24,57,103],"and":[25,41,58,72,95],"machine":[26],"reading":[27],"comprehension":[28],"(MRC).":[29],"To":[30],"address":[31],"this":[32],"gap,":[33],"we":[34],"present":[35],"VSMRC,":[36],"Vietnamese":[38,48],"Text":[39],"Segmentation":[40],"Multiple-Choice":[42],"Reading":[43],"Comprehension":[44],"Dataset.":[45],"Sourced":[46],"from":[47],"Wikipedia,":[49],"our":[50],"dataset":[51],"includes":[52],"15,942":[53],"documents":[54],"16,347":[59],"synthetic":[60],"multiple-choice":[61],"question-answer":[62],"pairs":[63],"generated":[64],"human":[66],"quality":[67],"assurance,":[68],"ensuring":[69],"a":[70],"reliable":[71],"diverse":[73],"resource.":[74],"Experiments":[75],"show":[76],"that":[77,109],"mBERT":[78],"consistently":[79],"outperforms":[80],"monolingual":[81],"models":[82,111],"on":[83,91,101],"both":[84],"tasks,":[85],"achieving":[86],"an":[87,96],"accuracy":[88],"of":[89,99],"88.01%":[90],"MRC":[92],"test":[93,104],"set":[94],"F1":[97],"score":[98],"63.15%":[100],"set.":[105],"Our":[106],"analysis":[107],"reveals":[108],"multilingual":[110],"excel":[112],"in":[113],"NLP":[114],"suggesting":[118],"potential":[119],"applications":[120],"to":[121],"other":[122],"under-resourced":[123],"languages.":[124],"VSMRC":[125],"is":[126],"available":[127],"at":[128],"HuggingFace1.":[129]},"counts_by_year":[],"updated_date":"2026-01-30T23:21:52.101496","created_date":"2026-01-30T00:00:00"}
