{"id":"https://openalex.org/W4307923067","doi":"https://doi.org/10.1145/3570209","title":"Chinese Grammatical Error Correction Using Pre-trained Models and Pseudo Data","display_name":"Chinese Grammatical Error Correction Using Pre-trained Models and Pseudo Data","publication_year":2022,"publication_date":"2022-11-02","ids":{"openalex":"https://openalex.org/W4307923067","doi":"https://doi.org/10.1145/3570209"},"language":"en","primary_location":{"id":"doi:10.1145/3570209","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3570209","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3570209","source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3570209","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100449113","display_name":"Hongfei Wang","orcid":"https://orcid.org/0000-0002-7169-0796"},"institutions":[{"id":"https://openalex.org/I69740276","display_name":"Tokyo Metropolitan University","ror":"https://ror.org/00ws30h19","country_code":"JP","type":"education","lineage":["https://openalex.org/I69740276"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Hongfei Wang","raw_affiliation_strings":["Tokyo Metropolitan University, Hinoshi, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0002-7169-0796","affiliations":[{"raw_affiliation_string":"Tokyo Metropolitan University, Hinoshi, Tokyo, Japan","institution_ids":["https://openalex.org/I69740276"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090963032","display_name":"Michiki Kurosawa","orcid":"https://orcid.org/0000-0001-7452-9861"},"institutions":[{"id":"https://openalex.org/I69740276","display_name":"Tokyo Metropolitan University","ror":"https://ror.org/00ws30h19","country_code":"JP","type":"education","lineage":["https://openalex.org/I69740276"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Michiki Kurosawa","raw_affiliation_strings":["Tokyo Metropolitan University, Hinoshi, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0001-7452-9861","affiliations":[{"raw_affiliation_string":"Tokyo Metropolitan University, Hinoshi, Tokyo, Japan","institution_ids":["https://openalex.org/I69740276"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010294982","display_name":"Satoru Katsumata","orcid":"https://orcid.org/0000-0002-9028-3598"},"institutions":[{"id":"https://openalex.org/I69740276","display_name":"Tokyo Metropolitan University","ror":"https://ror.org/00ws30h19","country_code":"JP","type":"education","lineage":["https://openalex.org/I69740276"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoru Katsumata","raw_affiliation_strings":["Tokyo Metropolitan University, Hinoshi, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0002-9028-3598","affiliations":[{"raw_affiliation_string":"Tokyo Metropolitan University, Hinoshi, Tokyo, Japan","institution_ids":["https://openalex.org/I69740276"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003760956","display_name":"Masato Mita","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089607","display_name":"CyberAgent (Japan)","ror":"https://ror.org/0060jg679","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210089607"]},{"id":"https://openalex.org/I4210136457","display_name":"Shibuya (Japan)","ror":"https://ror.org/03t1ztz45","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210136457"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masato Mita","raw_affiliation_strings":["CyberAgent, Inc., Shibuya-ku, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0001-6210-3716","affiliations":[{"raw_affiliation_string":"CyberAgent, Inc., Shibuya-ku, Tokyo, Japan","institution_ids":["https://openalex.org/I4210089607","https://openalex.org/I4210136457"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061931124","display_name":"Mamoru Komachi","orcid":"https://orcid.org/0000-0003-1166-1739"},"institutions":[{"id":"https://openalex.org/I69740276","display_name":"Tokyo Metropolitan University","ror":"https://ror.org/00ws30h19","country_code":"JP","type":"education","lineage":["https://openalex.org/I69740276"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Mamoru Komachi","raw_affiliation_strings":["Tokyo Metropolitan University, Hinoshi, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0003-1166-1739","affiliations":[{"raw_affiliation_string":"Tokyo Metropolitan University, Hinoshi, Tokyo, Japan","institution_ids":["https://openalex.org/I69740276"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100449113"],"corresponding_institution_ids":["https://openalex.org/I69740276"],"apc_list":null,"apc_paid":null,"fwci":0.8324,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.78063409,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"22","issue":"3","first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8078019618988037},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7255278825759888},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5953845977783203},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5685592293739319},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5676451921463013},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5596341490745544},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.42688706517219543},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.350546658039093},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.08649948239326477}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8078019618988037},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7255278825759888},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5953845977783203},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5685592293739319},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5676451921463013},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5596341490745544},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.42688706517219543},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.350546658039093},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.08649948239326477},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3570209","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3570209","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3570209","source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3570209","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3570209","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3570209","source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7799999713897705}],"awards":[{"id":"https://openalex.org/G8894170145","display_name":null,"funder_award_id":"JPMJFS2139","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4307923067.pdf","grobid_xml":"https://content.openalex.org/works/W4307923067.grobid-xml"},"referenced_works_count":7,"referenced_works":["https://openalex.org/W2965373594","https://openalex.org/W2972834553","https://openalex.org/W2981852735","https://openalex.org/W3169113923","https://openalex.org/W3199824684","https://openalex.org/W4226494438","https://openalex.org/W4288089799"],"related_works":["https://openalex.org/W4390516098","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W4205302943","https://openalex.org/W2119949815","https://openalex.org/W2561132942","https://openalex.org/W2142795561","https://openalex.org/W3155418658","https://openalex.org/W4243199227","https://openalex.org/W2379948177"],"abstract_inverted_index":{"In":[0],"recent":[1],"studies,":[2],"pre-trained":[3,32,51,175,226],"models":[4,33,47,64,96,100,129,147,176,227],"and":[5,34,57,60,83,140,153,164,177,185,190,216,228],"pseudo":[6,35,66,134,151,178,229],"data":[7,36,67,179],"have":[8,27],"been":[9],"key":[10],"factors":[11],"in":[12,37,160],"improving":[13],"the":[14,17,29,38,70,74,79,98,103,107,157,171,174,181,199,203,206],"performance":[15],"of":[16,31,106,116,133,162,173,202],"English":[18],"grammatical":[19],"error":[20,200,214],"correction":[21],"(GEC)":[22],"task.":[23,41,77,109],"However,":[24],"few":[25],"studies":[26],"examined":[28],"role":[30],"Chinese":[39,45,53,55,58,75,84,110,127,182],"GEC":[40,46,76,88,128,183],"Therefore,":[42],"we":[43,197],"develop":[44],"based":[48],"on":[49,180],"three":[50,131],"models:":[52],"BERT,":[54],"T5,":[56],"BART,":[59],"then":[61,124],"incorporate":[62],"these":[63],"with":[65,130],"to":[68],"determine":[69],"best":[71],"configuration":[72],"for":[73,193],"On":[78],"natural":[80],"language":[81],"processing":[82],"computing":[85],"(NLPCC)":[86],"2018":[87],"shared":[89,108],"task":[90,184],"test":[91],"set,":[92],"all":[93,213],"our":[94,126],"single":[95],"outperform":[97],"ensemble":[99],"developed":[101],"by":[102],"top":[104],"team":[105],"BART":[111],"achieves":[112],"an":[113,187],"F":[114],"score":[115],"37.15,":[117],"which":[118],"is":[119,156],"a":[120],"state-of-the-art":[121],"result.":[122],"We":[123,143],"combine":[125],"kinds":[132],"data:":[135],"Lang-8":[136],"(MaskGEC),":[137,139],"Wiki":[138,141],"(Backtranslation).":[142],"find":[144],"that":[145,209],"most":[146],"can":[148],"benefit":[149],"from":[150],"data,":[152],"BART+Lang-8":[154],"(MaskGEC)":[155],"ideal":[158],"setting":[159],"terms":[161],"accuracy":[163],"training":[165],"efficiency.":[166],"The":[167],"experimental":[168],"results":[169,207],"demonstrate":[170],"effectiveness":[172],"provide":[186],"easily":[188],"reproducible":[189],"adaptable":[191],"baseline":[192],"future":[194],"works.":[195],"Finally,":[196],"annotate":[198],"types":[201],"development":[204],"data;":[205],"show":[208],"word-level":[210],"errors":[211,219],"dominate":[212],"types,":[215],"word":[217],"selection":[218],"must":[220],"be":[221],"addressed":[222],"even":[223],"when":[224],"using":[225],"data.":[230],"Our":[231],"codes":[232],"are":[233],"available":[234],"at":[235],"https://github.com/wang136906578/BERT-encoder-ChineseGEC":[236],".":[237]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
