{"id":"https://openalex.org/W7161110139","doi":"https://doi.org/10.48550/arxiv.2605.13149","title":"AcquisitionSynthesis: Targeted Data Generation using Acquisition Functions","display_name":"AcquisitionSynthesis: Targeted Data Generation using Acquisition Functions","publication_year":2026,"publication_date":"2026-05-13","ids":{"openalex":"https://openalex.org/W7161110139","doi":"https://doi.org/10.48550/arxiv.2605.13149"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.13149","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13149","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.13149","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5099230662","display_name":"Ishika Agarwal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agarwal, Ishika","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128636411","display_name":"Sofia Stoica","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stoica, Sofia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038735910","display_name":"Emre Can Acikgoz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Acikgoz, Emre Can","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136109929","display_name":"Pradeep Natarajan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Natarajan, Pradeep","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026771837","display_name":"Mahdi Namazifar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Namazifar, Mahdi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136120064","display_name":"Jiaqi Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Jiaqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136129518","display_name":"Dilek Hakkani-T\u00fcr","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hakkani-T\u00fcr, Dilek","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.5401999950408936,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.5401999950408936,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1551000028848648,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.05119999870657921,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-acquisition","display_name":"Data acquisition","score":0.6298999786376953},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6226000189781189},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.5557000041007996},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4918999969959259},{"id":"https://openalex.org/keywords/knowledge-acquisition","display_name":"Knowledge acquisition","score":0.47760000824928284},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.4562999904155731},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.4417000114917755},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.43459999561309814}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7786999940872192},{"id":"https://openalex.org/C163985040","wikidata":"https://www.wikidata.org/wiki/Q1172399","display_name":"Data acquisition","level":2,"score":0.6298999786376953},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6226000189781189},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.5557000041007996},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5447999835014343},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5246000289916992},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4918999969959259},{"id":"https://openalex.org/C2777220311","wikidata":"https://www.wikidata.org/wiki/Q6423340","display_name":"Knowledge acquisition","level":2,"score":0.47760000824928284},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.4562999904155731},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.4417000114917755},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.43459999561309814},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.3831000030040741},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.3695000112056732},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3481000065803528},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34689998626708984},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.30809998512268066},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2858000099658966},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.2824000120162964},{"id":"https://openalex.org/C132758656","wikidata":"https://www.wikidata.org/wiki/Q5307365","display_name":"Dreyfus model of skill acquisition","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C60777511","wikidata":"https://www.wikidata.org/wiki/Q3045002","display_name":"Concept drift","level":3,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.13149","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13149","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.13149","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13149","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4615028500556946,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Data":[0],"quality":[1,19],"remains":[2],"a":[3,52,194],"critical":[4],"bottleneck":[5],"in":[6,90],"developing":[7],"capable,":[8],"competitive":[9],"models.":[10],"Researchers":[11],"have":[12,63],"explored":[13],"many":[14],"ways":[15],"to":[16,45,57,72,121,125,167,192],"generate":[17,126,175],"top":[18],"samples.":[20,36],"Some":[21],"works":[22,38,62],"rely":[23,39],"on":[24,40,80,133,158],"rejection":[25],"sampling:":[26],"generating":[27],"lots":[28],"of":[29,55,76,93,103,137],"synthetic":[30,128],"samples":[31,79],"and":[32,141,163,170,180],"filtering":[33],"out":[34],"low-quality":[35],"Other":[37],"larger":[41],"or":[42,51],"closed-source":[43],"models":[44,120,124,150,173,179],"extract":[46],"model":[47],"weaknesses,":[48],"necessary":[49],"skills,":[50],"curriculum":[53],"off":[54],"which":[56],"base":[58],"data":[59,154,176],"generation.":[60],"These":[61],"one":[64],"common":[65],"limitation:":[66],"there":[67],"is":[68,164],"no":[69],"quantitative":[70],"approach":[71],"measure":[73,98],"the":[74,77,81,91,99],"impact":[75],"generated":[78],"downstream":[82],"learner.":[83],"Active":[84],"learning":[85],"literature":[86],"provides":[87],"exactly":[88],"this,":[89,111],"form":[92],"acquisition":[94,116,188],"functions.":[95],"Acquisition":[96],"functions":[97,117],"informativeness":[100],"and/or":[101],"influence":[102],"data,":[104],"providing":[105],"interpretable,":[106],"model-centric":[107],"signals.":[108],"Inspired":[109],"by":[110],"we":[112,190],"propose":[113],"AcquisitionSynthesis:":[114],"using":[115],"as":[118],"reward":[119],"train":[122],"language":[123],"higher-quality":[127],"data.":[129],"We":[130],"conduct":[131],"experiments":[132],"classic":[134],"verifiable":[135],"tasks":[136,160],"math,":[138],"medical":[139],"question-answering,":[140],"coding.":[142],"Our":[143],"experimental":[144],"results":[145],"indicate":[146],"that":[147,200],"(1)":[148],"student":[149],"trained":[151],"with":[152],"AcquisitionSynthesis":[153,172],"achieve":[155],"good":[156],"performance":[157],"in-distribution":[159],"(2-7%":[161],"gain)":[162],"more":[165],"robust":[166],"catastrophic":[168],"forgetting,":[169],"(2)":[171],"can":[174],"for":[177,181],"other":[178],"low-to-high":[182],"resource":[183],"training":[184],"paradigms.":[185],"By":[186],"leveraging":[187],"rewards,":[189],"seek":[191],"demonstrate":[193],"principled":[195],"path":[196],"toward":[197],"model-aware":[198],"self-improvement":[199],"surpasses":[201],"static":[202],"datasets.":[203]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-15T00:00:00"}
