{"id":"https://openalex.org/W4392384314","doi":"https://doi.org/10.1145/3616855.3635847","title":"Towards Better Chinese Spelling Check for Search Engines: A New Dataset and Strong Baseline","display_name":"Towards Better Chinese Spelling Check for Search Engines: A New Dataset and Strong Baseline","publication_year":2024,"publication_date":"2024-03-04","ids":{"openalex":"https://openalex.org/W4392384314","doi":"https://doi.org/10.1145/3616855.3635847"},"language":"en","primary_location":{"id":"doi:10.1145/3616855.3635847","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3616855.3635847","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100417892","display_name":"Yue Wang","orcid":"https://orcid.org/0009-0004-7050-9811"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yue Wang","raw_affiliation_strings":["Soochow University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Soochow University, Suzhou, China","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018620132","display_name":"Zilong Zheng","orcid":"https://orcid.org/0009-0004-4381-1599"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zilong Zheng","raw_affiliation_strings":["Soochow University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Soochow University, Suzhou, China","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056462268","display_name":"Ziniu Tang","orcid":"https://orcid.org/0009-0000-0075-8282"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zecheng Tang","raw_affiliation_strings":["Soochow University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Soochow University, Suzhou, China","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100657514","display_name":"Juntao Li","orcid":"https://orcid.org/0000-0002-6286-7529"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Juntao Li","raw_affiliation_strings":["Soochow University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Soochow University, Suzhou, China","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047575195","display_name":"Zhihui Liu","orcid":"https://orcid.org/0009-0006-5119-5196"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhihui Liu","raw_affiliation_strings":["Ant Group, Beijing, China","Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Beijing, China","institution_ids":[]},{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071459757","display_name":"Kunlong Chen","orcid":"https://orcid.org/0009-0001-0961-1479"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kunlong Chen","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069625794","display_name":"Jinxiong Chang","orcid":"https://orcid.org/0009-0006-1712-4263"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jinxiong Chang","raw_affiliation_strings":["Ant Group, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005163911","display_name":"Qishen Zhang","orcid":"https://orcid.org/0000-0001-9964-6298"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qishen Zhang","raw_affiliation_strings":["Ant Group, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023862460","display_name":"Zhongyi Liu","orcid":"https://orcid.org/0000-0001-9478-8107"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhongyi Liu","raw_affiliation_strings":["Ant Group, Beijing, China","Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Beijing, China","institution_ids":[]},{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100402911","display_name":"Min Zhang","orcid":"https://orcid.org/0000-0002-3895-5510"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Zhang","raw_affiliation_strings":["Soochow University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Soochow University, Suzhou, China","institution_ids":["https://openalex.org/I3923682"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5100417892"],"corresponding_institution_ids":["https://openalex.org/I3923682"],"apc_list":null,"apc_paid":null,"fwci":1.022,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.78583486,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"769","last_page":"778"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spelling","display_name":"Spelling","score":0.8678717613220215},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7860432863235474},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7685196399688721},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.7210289835929871},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.647072434425354},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6239136457443237},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.617141604423523},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5848168134689331},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3937719762325287},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3615970015525818},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33656802773475647},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11761635541915894},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07766932249069214}],"concepts":[{"id":"https://openalex.org/C2777801307","wikidata":"https://www.wikidata.org/wiki/Q2088390","display_name":"Spelling","level":2,"score":0.8678717613220215},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7860432863235474},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7685196399688721},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.7210289835929871},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.647072434425354},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6239136457443237},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.617141604423523},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5848168134689331},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3937719762325287},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3615970015525818},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33656802773475647},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11761635541915894},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07766932249069214},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3616855.3635847","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3616855.3635847","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.4399999976158142}],"awards":[{"id":"https://openalex.org/G2027526133","display_name":null,"funder_award_id":"BK20220488","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3910829908","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6311684539","display_name":null,"funder_award_id":"202204","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6441875606","display_name":null,"funder_award_id":"62206194","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6558958160","display_name":"Collaborative Research:  A gravitational inventory of the solar system using high precision minor planet astrometry","funder_award_id":"2206194","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6600089284","display_name":"Collaborative Research:  AWARE -- International Linkages of Center for Intelligent Maintenance Systems on Web-Enabled and Tether-free Technologies","funder_award_id":"0220488","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G77284295","display_name":null,"funder_award_id":"2022048","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321605","display_name":"Government of Jiangsu Province","ror":"https://ror.org/004svx814"},{"id":"https://openalex.org/F4320322769","display_name":"Natural Science Foundation of Jiangsu Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1494632860","https://openalex.org/W2016871293","https://openalex.org/W2034038326","https://openalex.org/W2250323792","https://openalex.org/W2251568283","https://openalex.org/W2750779823","https://openalex.org/W2892311186","https://openalex.org/W2970521905","https://openalex.org/W2979826702","https://openalex.org/W2981852735","https://openalex.org/W2983735660","https://openalex.org/W3034797320","https://openalex.org/W3034999214","https://openalex.org/W3035309733","https://openalex.org/W3037162118","https://openalex.org/W3100315910","https://openalex.org/W3173175058","https://openalex.org/W3173375787","https://openalex.org/W3173712076","https://openalex.org/W3173859131","https://openalex.org/W3174595604","https://openalex.org/W3176140329","https://openalex.org/W3176484937","https://openalex.org/W3212050905","https://openalex.org/W4221165114","https://openalex.org/W4285233025","https://openalex.org/W4285293661","https://openalex.org/W4287888296","https://openalex.org/W4288089799","https://openalex.org/W4306317008","https://openalex.org/W4306317028"],"related_works":["https://openalex.org/W2161008081","https://openalex.org/W2100947578","https://openalex.org/W1555832326","https://openalex.org/W4298186509","https://openalex.org/W2556702969","https://openalex.org/W217221262","https://openalex.org/W611030372","https://openalex.org/W1974418053","https://openalex.org/W2081317458","https://openalex.org/W2021532426"],"abstract_inverted_index":{"Misspellings":[0],"in":[1,143],"search":[2,7,16,45,70,80,122],"engine":[3,81],"queries":[4],"may":[5],"prevent":[6],"engines":[8],"from":[9,59,78,115,244],"returning":[10],"accurate":[11],"results.":[12],"For":[13],"Chinese":[14,47,57,110,120],"mobile":[15,121],"engines,":[17,46],"due":[18,187],"to":[19,52,68,155,184,188,204,217],"the":[20,63,69,91,101,108,116,138,144,157,167,189,206,237,253],"different":[21],"input":[22,28],"methods":[23,175],"(e.g.,":[24],"hand-written":[25],"and":[26,54,89,131,161,165,176,239,258,272],"T9":[27],"methods),":[29],"more":[30,38,230,263],"types":[31],"of":[32,44,66,103,119,126,141,146,169],"misspellings":[33],"exist,":[34],"making":[35],"this":[36,85],"problem":[37],"challenging.":[39],"As":[40],"an":[41],"essential":[42],"module":[43],"Spelling":[48,111],"Check~(CSC)":[49],"models":[50,234],"aim":[51],"detect":[53],"correct":[55],"misspelled":[56],"characters":[58],"user-issued":[60],"queries.":[61,82],"Despite":[62],"great":[64],"value":[65],"CSC":[67,75,213],"engine,":[71],"there":[72],"is":[73,107,200,241],"no":[74],"benchmark":[76],"collected":[77,114,255],"real-world":[79,117,250,256],"To":[83,100,136],"fill":[84],"blank,":[86],"we":[87,150,209,261],"construct":[88,218],"release":[90],"Alipay":[92],"Search":[93],"Engine":[94],"Query":[95],"(AlipaySEQ)":[96],"spelling":[97],"check":[98],"dataset.":[99,251],"best":[102],"our":[104,226,249],"knowledge,":[105],"AlipaySEQ":[106,142,160,238],"first":[109],"Check":[112],"dataset":[113,257],"scenario":[118],"engines.":[123],"It":[124],"consists":[125],"15,522":[127],"high-quality":[128],"human":[129],"annotated":[130],"1,175,151":[132],"automatically":[133],"generated":[134],"samples.":[135],"demonstrate":[137,224],"unique":[139],"challenges":[140],"era":[145],"Large":[147],"Language":[148],"Models~(LLMs),":[149],"conduct":[151],"a":[152,211,219,270],"thorough":[153],"study":[154],"analyze":[156],"difference":[158],"between":[159],"existing":[162,173,233],"SIGHAN":[163],"benchmarks":[164],"compare":[166],"performance":[168,196,247],"various":[170],"baselines,":[171],"including":[172],"task-specific":[174],"LLMs.":[177],"We":[178],"observe":[179],"that":[180,225],"all":[181],"baselines":[182],"fail":[183],"perform":[185],"satisfactorily":[186],"over-correction":[190,207],"problem.":[191],"Especially,":[192],"LLMs":[193],"exhibit":[194],"below-par":[195],"on":[197,235,248,268],"AlipaySEQ,":[198],"which":[199],"rather":[201],"surprising.":[202],"Therefore,":[203],"alleviate":[205],"problem,":[208],"introduce":[210],"model-agnostic":[212],"Self-Refine":[214],"Framework":[215],"(SRF)":[216],"strong":[220,259],"baseline.":[221],"Comprehensive":[222],"experiments":[223],"proposed":[227],"SRF,":[228],"though":[229],"effective":[231],"against":[232],"both":[236],"SIGHAN15,":[240],"still":[242],"far":[243],"achieving":[245],"satisfactory":[246],"With":[252],"newly":[254],"baseline,":[260],"hope":[262],"progress":[264],"can":[265],"be":[266],"achieved":[267],"such":[269],"challenging":[271],"valuable":[273],"task.":[274]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
