{"id":"https://openalex.org/W7133312993","doi":"https://doi.org/10.1109/tetci.2026.3655163","title":"Homophone-Based Chinese Natural Language Data Augmentation","display_name":"Homophone-Based Chinese Natural Language Data Augmentation","publication_year":2026,"publication_date":"2026-03-03","ids":{"openalex":"https://openalex.org/W7133312993","doi":"https://doi.org/10.1109/tetci.2026.3655163"},"language":null,"primary_location":{"id":"doi:10.1109/tetci.2026.3655163","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetci.2026.3655163","pdf_url":null,"source":{"id":"https://openalex.org/S4210210251","display_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","issn_l":"2471-285X","issn":["2471-285X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jia Wei","orcid":"https://orcid.org/0000-0002-2234-0378"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jia Wei","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-2234-0378","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062123451","display_name":"Xingjun Zhang","orcid":"https://orcid.org/0000-0003-1434-7016"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingjun Zhang","raw_affiliation_strings":["School of Computer Science and Technology, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China"],"raw_orcid":"https://orcid.org/0000-0003-1434-7016","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128003409","display_name":"Witold Pedrycz","orcid":null},"institutions":[{"id":"https://openalex.org/I119004910","display_name":"Silesian University of Technology","ror":"https://ror.org/02dyjk442","country_code":"PL","type":"education","lineage":["https://openalex.org/I119004910"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Witold Pedrycz","raw_affiliation_strings":["Department of Measurement and Control Systems, Silesian University of Technology (SUT), Gliwice, Poland"],"raw_orcid":"https://orcid.org/0000-0002-9335-9930","affiliations":[{"raw_affiliation_string":"Department of Measurement and Control Systems, Silesian University of Technology (SUT), Gliwice, Poland","institution_ids":["https://openalex.org/I119004910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127874354","display_name":"Longxiang Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longxiang Wang","raw_affiliation_strings":["School of Computer Science and Technology, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China"],"raw_orcid":"https://orcid.org/0000-0003-2005-114X","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127891982","display_name":"Weiping Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I199305430","display_name":"Nantong University","ror":"https://ror.org/02afcvw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I199305430"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiping Ding","raw_affiliation_strings":["School of Artificial Intelligence and Computer Science, Nantong University, Nantong, China"],"raw_orcid":"https://orcid.org/0000-0002-3180-7347","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Computer Science, Nantong University, Nantong, China","institution_ids":["https://openalex.org/I199305430"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5127911207","display_name":"Xuehai Qian","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuehai Qian","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0004-3950-1264","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.27521102,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"10","issue":"3","first_page":"2291","last_page":"2300"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6100999712944031,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6100999712944031,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.14329999685287476,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.02630000002682209,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/homophone","display_name":"Homophone","score":0.8561999797821045},{"id":"https://openalex.org/keywords/pinyin","display_name":"Pinyin","score":0.753600001335144},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5694000124931335},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5248000025749207},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.3991999924182892},{"id":"https://openalex.org/keywords/chinese-language","display_name":"Chinese language","score":0.3865000009536743},{"id":"https://openalex.org/keywords/meaning","display_name":"Meaning (existential)","score":0.37630000710487366},{"id":"https://openalex.org/keywords/natural-language-generation","display_name":"Natural language generation","score":0.36329999566078186}],"concepts":[{"id":"https://openalex.org/C160253069","wikidata":"https://www.wikidata.org/wiki/Q221079","display_name":"Homophone","level":2,"score":0.8561999797821045},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8209999799728394},{"id":"https://openalex.org/C2781095461","wikidata":"https://www.wikidata.org/wiki/Q42222","display_name":"Pinyin","level":3,"score":0.753600001335144},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6639999747276306},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5694000124931335},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5248000025749207},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5123999714851379},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3991999924182892},{"id":"https://openalex.org/C3018428822","wikidata":"https://www.wikidata.org/wiki/Q7850","display_name":"Chinese language","level":2,"score":0.3865000009536743},{"id":"https://openalex.org/C2780876879","wikidata":"https://www.wikidata.org/wiki/Q3054749","display_name":"Meaning (existential)","level":2,"score":0.37630000710487366},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.36329999566078186},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3319999873638153},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3301999866962433},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.3199999928474426},{"id":"https://openalex.org/C2777515626","wikidata":"https://www.wikidata.org/wiki/Q496939","display_name":"Levenshtein distance","level":2,"score":0.3125999867916107},{"id":"https://openalex.org/C67463725","wikidata":"https://www.wikidata.org/wiki/Q17081452","display_name":"Natural language programming","level":5,"score":0.2994999885559082},{"id":"https://openalex.org/C2777019801","wikidata":"https://www.wikidata.org/wiki/Q5100702","display_name":"Chinese as a foreign language","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C174252522","wikidata":"https://www.wikidata.org/wiki/Q3816772","display_name":"Natural language user interface","level":3,"score":0.2842000126838684},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.26930001378059387},{"id":"https://openalex.org/C160403385","wikidata":"https://www.wikidata.org/wiki/Q220543","display_name":"Queue","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.2572999894618988},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2551000118255615}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tetci.2026.3655163","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetci.2026.3655163","pdf_url":null,"source":{"id":"https://openalex.org/S4210210251","display_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","issn_l":"2471-285X","issn":["2471-285X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5172787308692932,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G349678912","display_name":null,"funder_award_id":"62372366","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4431121220","display_name":null,"funder_award_id":"62532005","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Data,":[0],"devices,":[1],"and":[2,9,20,50,65,77,93,106,126,140,187,211,228],"algorithms":[3],"drive":[4],"progress":[5],"in":[6,85,161],"artificial":[7],"intelligence,":[8],"high":[10],"quality":[11],"data":[12,23,32,124,183],"is":[13,24,226],"essential":[14],"for":[15,120,157,191,197],"model":[16,53],"training.":[17],"However,":[18,95],"collecting":[19],"labeling":[21],"such":[22,89],"costly.":[25],"Inspired":[26],"by":[27,219,231],"computer":[28],"vision,":[29],"natural":[30,122],"language":[31,52,107,123,193],"augmentation":[33,58,125,176,184,195],"has":[34],"attracted":[35],"attention":[36],"as":[37,90],"a":[38,128,134,137,148,151,170,189],"way":[39],"to":[40,67,221,240],"expand":[41],"corpora":[42],"through":[43],"random":[44],"insertion,":[45],"deletion,":[46],"synonym":[47],"replacement,":[48],"swapping,":[49],"large":[51],"based":[54,153],"generation.":[55],"Most":[56],"Chinese":[57,69,79,117,121,130,175,178,182,203],"methods":[59,196],"still":[60,98],"follow":[61],"English":[62],"style":[63],"strategies":[64],"fail":[66],"exploit":[68,116],"specific":[70,194],"linguistic":[71],"properties,":[72],"which":[73],"limits":[74],"their":[75],"usability":[76],"effectiveness.":[78],"users":[80],"often":[81],"make":[82],"homophone":[83,113,118],"errors":[84],"instant":[86],"messaging":[87],"programs":[88],"QQ,":[91],"WeChat,":[92],"WhatsApp.":[94],"readers":[96],"can":[97],"understand":[99],"the":[100,143,162,181,224,242],"intended":[101],"meaning":[102],"from":[103],"context,":[104],"background,":[105],"habits.":[108],"We":[109,115,200,235],"call":[110],"this":[111],"phenomenon":[112],"consistency.":[114],"consistency":[119],"design":[127],"Homophone-based":[129],"Augmentation.":[131],"HCA":[132,173,215],"converts":[133,142],"sentence":[135,149,164],"into":[136,147],"pinyin":[138,144,158],"queue":[139,145],"then":[141],"back":[146],"using":[150,177],"dictionary":[152],"dynamic":[154],"programming":[155],"algorithm":[156],"identification.":[159],"Words":[160],"original":[163],"are":[165],"replaced":[166],"with":[167,169],"homophones":[168],"specified":[171],"probability.":[172],"broadens":[174],"features,":[179],"enriches":[180],"strategy":[185],"space,":[186],"provides":[188],"reference":[190],"designing":[192],"other":[198],"languages.":[199],"fine":[201],"tune":[202],"BERT":[204],"on":[205,233,247],"four":[206],"datasets":[207],"of":[208,244],"different":[209],"sizes":[210],"evaluate":[212],"test":[213,217],"accuracy.":[214],"improves":[216,229],"accuracy":[218],"up":[220],"37.62%":[222],"when":[223],"dataset":[225],"small":[227],"it":[230],"3.42%":[232],"average.":[234],"also":[236],"conduct":[237],"ablation":[238],"studies":[239],"analyze":[241],"effects":[243],"key":[245],"parameters":[246],"training":[248],"outcomes.":[249]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-04T00:00:00"}
