{"id":"https://openalex.org/W4416034790","doi":"https://doi.org/10.18653/v1/2025.findings-emnlp.693","title":"DPF-CM: A Data Processing Framework with Privacy-Preserving Vector Databases for Chinese Medical LLMs Training and Deployment","display_name":"DPF-CM: A Data Processing Framework with Privacy-Preserving Vector Databases for Chinese Medical LLMs Training and Deployment","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416034790","doi":"https://doi.org/10.18653/v1/2025.findings-emnlp.693"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.findings-emnlp.693","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.693","pdf_url":"https://aclanthology.org/2025.findings-emnlp.693.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-emnlp.693.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100352894","display_name":"Wei Huang","orcid":"https://orcid.org/0009-0004-9458-7769"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Huang","raw_affiliation_strings":["Ant Group , China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ant Group , China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038610148","display_name":"Anda Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anda Cheng","raw_affiliation_strings":["Ant Group , China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ant Group , China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100423006","display_name":"Zhao Zhang","orcid":"https://orcid.org/0000-0002-0862-1093"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao Zhang","raw_affiliation_strings":["Ant Group , China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ant Group , China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028637336","display_name":"Yinggui Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yinggui Wang","raw_affiliation_strings":["Ant Group , China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ant Group , China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15749758,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"12904","last_page":"12916"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.880299985408783,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.880299985408783,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.010099999606609344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10237","display_name":"Cryptography and Data Security","score":0.009200000204145908,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5799999833106995},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5325000286102295},{"id":"https://openalex.org/keywords/data-processing","display_name":"Data processing","score":0.5006999969482422},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3222000002861023},{"id":"https://openalex.org/keywords/data-processing-system","display_name":"Data processing system","score":0.3181999921798706},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3073999881744385}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6119999885559082},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5799999833106995},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5325000286102295},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.5024999976158142},{"id":"https://openalex.org/C138827492","wikidata":"https://www.wikidata.org/wiki/Q6661985","display_name":"Data processing","level":2,"score":0.5006999969482422},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.35109999775886536},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33399999141693115},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3222000002861023},{"id":"https://openalex.org/C112118009","wikidata":"https://www.wikidata.org/wiki/Q4925872","display_name":"Data processing system","level":2,"score":0.3181999921798706},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.31349998712539673},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3073999881744385},{"id":"https://openalex.org/C87868495","wikidata":"https://www.wikidata.org/wiki/Q750843","display_name":"Information processing","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29319998621940613},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2881999909877777},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.26269999146461487},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2551000118255615},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.25209999084472656},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.25040000677108765},{"id":"https://openalex.org/C180198813","wikidata":"https://www.wikidata.org/wiki/Q121182","display_name":"Information system","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-emnlp.693","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.693","pdf_url":"https://aclanthology.org/2025.findings-emnlp.693.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-emnlp.693","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.693","pdf_url":"https://aclanthology.org/2025.findings-emnlp.693.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416034790.pdf","grobid_xml":"https://content.openalex.org/works/W4416034790.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Current":[0],"open-source":[1,177],"training":[2,12,28,46,124,182],"pipelines":[3],"for":[4,42,61,94],"Chinese":[5,43,169],"medical":[6,170],"language":[7,20,189],"models":[8,21,102,190],"predominantly":[9],"emphasize":[10],"optimizing":[11],"methodologies":[13],"to":[14,76,80,103,151,172],"enhance":[15],"the":[16,82,120,179],"performance":[17,175],"of":[18,84,123,199,204],"large":[19,188],"(LLMs),":[22],"yet":[23],"lack":[24,83],"comprehensive":[25],"exploration":[26],"into":[27],"data":[29,57,65,96,183],"processing.To":[30],"address":[31],"this":[32],"gap,":[33],"we":[34,68,126],"propose":[35,127],"DPF-CM,":[36],"a":[37,56,71,128,201],"holistic":[38],"Data":[39],"Processing":[40],"Framework":[41],"Medical":[44],"LLMs":[45],"and":[47,87,146,196],"deployment.DPF-CM":[48],"comprises":[49],"two":[50],"core":[51],"modules.The":[52],"first":[53],"module":[54,108],"is":[55],"processing":[58,66],"pipeline":[59],"tailored":[60],"model":[62,114,137,164],"training.Beyond":[63],"standard":[64],"operations,":[67],"(1)":[69],"introduce":[70],"chained":[72],"examples":[73],"context-learning":[74],"strategy":[75],"generate":[77],"question-oriented":[78],"instructions":[79],"mitigate":[81],"instruction":[85],"content,":[86],"(2)":[88],"implement":[89],"an":[90],"ensemble-based":[91],"filtering":[92],"mechanism":[93],"preference":[95],"curation":[97],"that":[98,160],"averages":[99],"multiple":[100],"reward":[101],"suppress":[104],"noisy":[105],"samples.The":[106],"second":[107],"focuses":[109],"on":[110],"privacy":[111,117,153,184],"preservation":[112],"during":[113,155],"deployment.To":[115],"prevent":[116],"risks":[118],"from":[119],"inadvertent":[121],"exposure":[122],"data,":[125],"Privacy":[129],"Preserving":[130],"Vector":[131],"Database":[132],"(PPVD)":[133],"approach,":[134],"which":[135],"involves":[136],"memory":[138],"search,":[139],"highrisk":[140],"database":[141,144],"construction,":[142,145],"secure":[143],"match-and-replace,":[147],"four":[148],"key":[149],"stages":[150],"minimize":[152],"leakage":[154,185],"inference":[156],"collectively.Experimental":[157],"results":[158],"show":[159],"DPF-CM":[161],"significantly":[162],"improves":[163],"accuracy,":[165],"enabling":[166],"our":[167],"trained":[168],"LLM":[171],"achieve":[173],"state-of-the-art":[174],"among":[176],"counterparts.Moreover,":[178],"framework":[180],"reduces":[181],"by":[186],"27%.Recent":[187],"(LLMs)":[191],"have":[192],"achieved":[193],"remarkable":[194],"breakthroughs":[195],"are":[197],"capable":[198],"answering":[200],"wide":[202],"range":[203],"questions":[205],"(Achiam":[206],"et":[207,210],"al.,":[208,211],"2023;Wang":[209],"2023).":[212]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-08T00:00:00"}
