{"id":"https://openalex.org/W4410636854","doi":"https://doi.org/10.1145/3701716.3715245","title":"Few-shot LLM Synthetic Data with Distribution Matching","display_name":"Few-shot LLM Synthetic Data with Distribution Matching","publication_year":2025,"publication_date":"2025-05-08","ids":{"openalex":"https://openalex.org/W4410636854","doi":"https://doi.org/10.1145/3701716.3715245"},"language":"en","primary_location":{"id":"doi:10.1145/3701716.3715245","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3701716.3715245","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3715245","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3715245","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001801047","display_name":"Jiyuan Ren","orcid":"https://orcid.org/0009-0005-4698-6682"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiyuan Ren","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040755010","display_name":"Zhaocheng Du","orcid":"https://orcid.org/0000-0002-1811-129X"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaocheng Du","raw_affiliation_strings":["Huawei Noah's Ark Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088110992","display_name":"Zhihao Wen","orcid":"https://orcid.org/0000-0002-7688-5381"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhihao Wen","raw_affiliation_strings":["Huawei Noah's Ark Lab, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, Singapore, Singapore","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032579783","display_name":"Qinglin Jia","orcid":"https://orcid.org/0000-0002-3583-6719"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinglin Jia","raw_affiliation_strings":["Huawei Noah's Ark Lab, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, Beijing, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075518954","display_name":"Sunhao Dai","orcid":"https://orcid.org/0009-0002-7549-0860"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sunhao Dai","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001967239","display_name":"Chuhan Wu","orcid":"https://orcid.org/0000-0001-5730-8792"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chuhan Wu","raw_affiliation_strings":["Huawei Noah's Ark Lab, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, Beijing, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021124418","display_name":"Zhenhua Dong","orcid":"https://orcid.org/0000-0002-2231-4663"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenhua Dong","raw_affiliation_strings":["Huawei Noah's Ark Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5001801047"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":1.7317,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.82621296,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"432","last_page":"441"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10399","display_name":"Hydrocarbon exploration and reservoir analysis","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/2211","display_name":"Mechanics of Materials"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10399","display_name":"Hydrocarbon exploration and reservoir analysis","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/2211","display_name":"Mechanics of Materials"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11801","display_name":"Reservoir Engineering and Simulation Methods","score":0.9437999725341797,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10271","display_name":"Seismic Imaging and Inversion Techniques","score":0.9287999868392944,"subfield":{"id":"https://openalex.org/subfields/1908","display_name":"Geophysics"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.7226340770721436},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6375576257705688},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5991864800453186},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.5397303104400635},{"id":"https://openalex.org/keywords/one-shot","display_name":"One shot","score":0.4882180988788605},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3327788710594177},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14884477853775024},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09094884991645813},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.08410930633544922},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.06566742062568665}],"concepts":[{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.7226340770721436},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6375576257705688},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5991864800453186},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.5397303104400635},{"id":"https://openalex.org/C2992734406","wikidata":"https://www.wikidata.org/wiki/Q413267","display_name":"One shot","level":2,"score":0.4882180988788605},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3327788710594177},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14884477853775024},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09094884991645813},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.08410930633544922},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.06566742062568665},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C191897082","wikidata":"https://www.wikidata.org/wiki/Q11467","display_name":"Metallurgy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3701716.3715245","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3701716.3715245","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3715245","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3701716.3715245","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3701716.3715245","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3715245","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4410636854.pdf","grobid_xml":"https://content.openalex.org/works/W4410636854.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W128638292","https://openalex.org/W1503398984","https://openalex.org/W1859757340","https://openalex.org/W2152749438","https://openalex.org/W2212660284","https://openalex.org/W2251939518","https://openalex.org/W3021397474","https://openalex.org/W3153451655","https://openalex.org/W4292341621","https://openalex.org/W4319300193","https://openalex.org/W4368755500","https://openalex.org/W4384662964","https://openalex.org/W4384828482","https://openalex.org/W4387846179","https://openalex.org/W4400909835","https://openalex.org/W4401863288","https://openalex.org/W4401863388","https://openalex.org/W4403220139","https://openalex.org/W6600545252","https://openalex.org/W6600804061","https://openalex.org/W6602811707","https://openalex.org/W6605035644","https://openalex.org/W6614377781"],"related_works":["https://openalex.org/W2497720472","https://openalex.org/W4292659306","https://openalex.org/W3044321615","https://openalex.org/W2806221744","https://openalex.org/W2326937258","https://openalex.org/W394267150","https://openalex.org/W2773965352","https://openalex.org/W4294892107","https://openalex.org/W2357748469","https://openalex.org/W2392917037"],"abstract_inverted_index":{"As":[0,62],"large":[1],"language":[2,14,54],"models":[3,35],"(LLMs)":[4],"advance,":[5],"their":[6],"ability":[7],"to":[8,24,29,115,188,226],"perform":[9],"in-context":[10],"learning":[11],"and":[12,93,154],"few-shot":[13],"generation":[15,92],"has":[16,20],"improved":[17],"significantly.":[18],"This":[19,162],"spurred":[21],"using":[22],"LLMs":[23],"produce":[25],"high-quality":[26],"synthetic":[27,44,67,90,195],"data":[28,45,51,68,72,77,91,118,128,158,167],"enhance":[30],"the":[31,49,75,111,131,136,147,178,185,190,201],"performance":[32,81,213],"of":[33,135,152,193],"smaller":[34],"like":[36],"online":[37,219,224],"retrievers":[38],"or":[39],"weak":[40],"LLMs.":[41],"However,":[42],"LLM-generated":[43],"often":[46],"differs":[47],"from":[48,121],"real":[50,71,137,174,202],"in":[52,173],"key":[53,98],"attributes":[55,151,170],"(e.g.,":[56],"styles,":[57],"tones,":[58],"content":[59],"proportions,":[60],"etc.).":[61],"a":[63,89,140],"result,":[64],"mixing":[65],"these":[66],"directly":[69],"with":[70,168,200],"may":[73],"distort":[74],"original":[76],"distribution,":[78],"potentially":[79],"hindering":[80],"improvements.":[82,214],"To":[83],"solve":[84],"this,":[85],"we":[86],"introduce":[87],"SynAlign:":[88],"filtering":[94],"framework":[95],"based":[96,159],"on":[97,160,206,222],"attribute":[99,142],"distribution":[100,198],"matching.":[101],"Before":[102],"generation,":[103,177],"SynAlign":[104],"employs":[105],"an":[106,218,223],"uncertainty":[107],"tracker":[108],"surrogated":[109],"by":[110],"Gaussian":[112],"Process":[113],"model":[114],"iteratively":[116],"select":[117],"clusters":[119],"distinct":[120],"selected":[122],"ones":[123],"as":[124,184],"demonstrations":[125,153],"for":[126],"new":[127,157],"synthesis,":[129],"facilitating":[130],"efficient":[132],"exploration":[133],"diversity":[134],"data.":[138,175,203],"Then,":[139],"latent":[141],"reasoning":[143],"method":[144],"is":[145,182,232],"employed:":[146],"LLM":[148],"summarizes":[149],"linguistic":[150,169],"then":[155],"synthesizes":[156],"them.":[161],"approach":[163],"facilitates":[164],"synthesizing":[165],"diverse":[166],"that":[171],"appear":[172],"After":[176],"Maximum":[179],"Mean":[180],"Discrepancy":[181],"used":[183],"objective":[186],"function":[187],"learn":[189],"sampling":[191],"weight":[192],"each":[194],"data,":[196],"ensuring":[197],"matching":[199],"Our":[204,230],"experiments":[205],"multiple":[207],"text":[208],"prediction":[209],"tasks":[210],"show":[211],"significant":[212],"We":[215],"also":[216],"conducted":[217],"A/B":[220],"test":[221],"retriever":[225],"demonstrate":[227],"SynAlign's":[228],"effectiveness.":[229],"code":[231],"available":[233],"https://github.com/nighood/SynAlign":[234],"here.":[235]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
