{"id":"https://openalex.org/W7141176254","doi":"https://doi.org/10.1007/s11390-026-5948-8","title":"Data Preparation for Large Language Models","display_name":"Data Preparation for Large Language Models","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7141176254","doi":"https://doi.org/10.1007/s11390-026-5948-8"},"language":"en","primary_location":{"id":"doi:10.1007/s11390-026-5948-8","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s11390-026-5948-8","pdf_url":null,"source":{"id":"https://openalex.org/S161516442","display_name":"Journal of Computer Science and Technology","issn_l":"1000-9000","issn":["1000-9000","1860-4749"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computer Science and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130771207","display_name":"Hao Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Liang","raw_affiliation_strings":["Beijing Zhongguancun Academy, Beijing, China","Center for Data Science, Peking University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Zhongguancun Academy, Beijing, China","institution_ids":[]},{"raw_affiliation_string":"Center for Data Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130806134","display_name":"Zhen Hao Wong","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen Hao Wong","raw_affiliation_strings":["School of Mathematical Science, Peking University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Mathematical Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057222304","display_name":"Ruitong Liu","orcid":"https://orcid.org/0000-0003-2361-2993"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui-Tong Liu","raw_affiliation_strings":["School of Mathematical Science, Peking University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Mathematical Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130780362","display_name":"Yu-Han Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu-Han Wang","raw_affiliation_strings":["School of Mathematical Science, Peking University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Mathematical Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130817755","display_name":"Mei-Yi Qiang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mei-Yi Qiang","raw_affiliation_strings":["School of Software and Microelectronics, Peking University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Software and Microelectronics, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130734437","display_name":"Zheng-Yang Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng-Yang Zhao","raw_affiliation_strings":["Center for Data Science, Peking University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Data Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130728090","display_name":"Cheng-Yu Shen","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng-Yu Shen","raw_affiliation_strings":["School of Software and Microelectronics, Peking University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Software and Microelectronics, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126772346","display_name":"Cong-hui He","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cong-Hui He","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130732204","display_name":"Wen-Tao Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wen-Tao Zhang","raw_affiliation_strings":["Beijing Zhongguancun Academy, Beijing, China","Center for Data Science, Peking University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Zhongguancun Academy, Beijing, China","institution_ids":[]},{"raw_affiliation_string":"Center for Data Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5130756801","display_name":"Bin Cui","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bin Cui","raw_affiliation_strings":["Beijing Key Laboratory of Software and Hardware Cooperative Artificial Intelligence Systems, Beijing, China","School of Computer Science, Peking University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Key Laboratory of Software and Hardware Cooperative Artificial Intelligence Systems, Beijing, China","institution_ids":["https://openalex.org/I4210100255"]},{"raw_affiliation_string":"School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5130732204","https://openalex.org/A5130756801"],"corresponding_institution_ids":["https://openalex.org/I20231570","https://openalex.org/I4210100255"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.41742538,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"41","issue":"1","first_page":"289","last_page":"317"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.10109999775886536,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.10109999775886536,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.05770000070333481,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.051500000059604645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.7562000155448914},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.6284999847412109},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5927000045776367},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.37779998779296875},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.33410000801086426},{"id":"https://openalex.org/keywords/theory-of-computation","display_name":"Theory of computation","score":0.3089999854564667}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8528000116348267},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.7562000155448914},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6337000131607056},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.6284999847412109},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5927000045776367},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.37779998779296875},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3546999990940094},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.33410000801086426},{"id":"https://openalex.org/C24858836","wikidata":"https://www.wikidata.org/wiki/Q844718","display_name":"Theory of computation","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.303600013256073},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.27649998664855957},{"id":"https://openalex.org/C2778464652","wikidata":"https://www.wikidata.org/wiki/Q309849","display_name":"Open research","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C100463513","wikidata":"https://www.wikidata.org/wiki/Q5227322","display_name":"Data model (GIS)","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11390-026-5948-8","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s11390-026-5948-8","pdf_url":null,"source":{"id":"https://openalex.org/S161516442","display_name":"Journal of Computer Science and Technology","issn_l":"1000-9000","issn":["1000-9000","1860-4749"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computer Science and Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":84,"referenced_works":["https://openalex.org/W1981773323","https://openalex.org/W2100960835","https://openalex.org/W2120101509","https://openalex.org/W2132069633","https://openalex.org/W2145349611","https://openalex.org/W2152565070","https://openalex.org/W2158874082","https://openalex.org/W2606974598","https://openalex.org/W2888482885","https://openalex.org/W2912924812","https://openalex.org/W2923014074","https://openalex.org/W2962833164","https://openalex.org/W2963339397","https://openalex.org/W2963748441","https://openalex.org/W2970641574","https://openalex.org/W2971015127","https://openalex.org/W3100355250","https://openalex.org/W3103639864","https://openalex.org/W3128232076","https://openalex.org/W3159959439","https://openalex.org/W3169483174","https://openalex.org/W3172340245","https://openalex.org/W3176477796","https://openalex.org/W3177057043","https://openalex.org/W3177468621","https://openalex.org/W3177765786","https://openalex.org/W3196731672","https://openalex.org/W3206487987","https://openalex.org/W4206637810","https://openalex.org/W4284691825","https://openalex.org/W4285178342","https://openalex.org/W4382318960","https://openalex.org/W4385564993","https://openalex.org/W4385570425","https://openalex.org/W4385570522","https://openalex.org/W4385572634","https://openalex.org/W4385572901","https://openalex.org/W4385734162","https://openalex.org/W4389519099","https://openalex.org/W4389524372","https://openalex.org/W4389524493","https://openalex.org/W4394782456","https://openalex.org/W4398221084","https://openalex.org/W4398234485","https://openalex.org/W4401042824","https://openalex.org/W4401042981","https://openalex.org/W4401043863","https://openalex.org/W4402351990","https://openalex.org/W4402671026","https://openalex.org/W4402671286","https://openalex.org/W4402671353","https://openalex.org/W4402671569","https://openalex.org/W4402671749","https://openalex.org/W4402671765","https://openalex.org/W4402683765","https://openalex.org/W4402683794","https://openalex.org/W4403421522","https://openalex.org/W4404445846","https://openalex.org/W4404781171","https://openalex.org/W4404782091","https://openalex.org/W4404782113","https://openalex.org/W4404782196","https://openalex.org/W4404783895","https://openalex.org/W4406085118","https://openalex.org/W4406157726","https://openalex.org/W4411119301","https://openalex.org/W4411119444","https://openalex.org/W4411120413","https://openalex.org/W4411624718","https://openalex.org/W4412886635","https://openalex.org/W4412888066","https://openalex.org/W4412888088","https://openalex.org/W4412888341","https://openalex.org/W4412888419","https://openalex.org/W4412889571","https://openalex.org/W4412944787","https://openalex.org/W4412945435","https://openalex.org/W4412945683","https://openalex.org/W4414359227","https://openalex.org/W4415725257","https://openalex.org/W4415933221","https://openalex.org/W4416036086","https://openalex.org/W7120088840","https://openalex.org/W7126027675"],"related_works":[],"abstract_inverted_index":null,"counts_by_year":[],"updated_date":"2026-06-13T06:13:01.061226","created_date":"2026-03-28T00:00:00"}
