{"id":"https://openalex.org/W7125523437","doi":"https://doi.org/10.48550/arxiv.2601.15417","title":"Ambient Dataloops: Generative Models for Dataset Refinement","display_name":"Ambient Dataloops: Generative Models for Dataset Refinement","publication_year":2026,"publication_date":"2026-01-21","ids":{"openalex":"https://openalex.org/W7125523437","doi":"https://doi.org/10.48550/arxiv.2601.15417"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.15417","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.15417","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.15417","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010236653","display_name":"Adri\u00e1n Rodr\u00edguez-Mu\u00f1oz","orcid":"https://orcid.org/0000-0002-4130-7696"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rodr\u00edguez-Mu\u00f1oz, Adri\u00e1n","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5118833941","display_name":"William Daspit","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daspit, William","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006626709","display_name":"Adam R. Klivans","orcid":"https://orcid.org/0000-0001-6960-2235"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Klivans, Adam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085020955","display_name":"Antonio Torralba","orcid":"https://orcid.org/0000-0003-4915-0256"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Torralba, Antonio","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123666001","display_name":"Constantinos Daskalakis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daskalakis, Constantinos","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5005479839","display_name":"Giannis Daras","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daras, Giannis","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5010236653"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8245999813079834,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8245999813079834,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12859","display_name":"Cell Image Analysis Techniques","score":0.03920000046491623,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.033399999141693115,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.36559998989105225},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.36559998989105225},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.3522000014781952},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.3522000014781952},{"id":"https://openalex.org/keywords/refining","display_name":"Refining (metallurgy)","score":0.34279999136924744},{"id":"https://openalex.org/keywords/refining","display_name":"Refining (metallurgy)","score":0.34279999136924744},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3391000032424927},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3391000032424927},{"id":"https://openalex.org/keywords/noisy-data","display_name":"Noisy data","score":0.3345000147819519},{"id":"https://openalex.org/keywords/noisy-data","display_name":"Noisy data","score":0.3345000147819519}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6841999888420105},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6841999888420105},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44510000944137573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44510000944137573},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4431999921798706},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4431999921798706},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38600000739097595},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38600000739097595},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.36559998989105225},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.36559998989105225},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3562999963760376},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3562999963760376},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3522000014781952},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3522000014781952},{"id":"https://openalex.org/C60044698","wikidata":"https://www.wikidata.org/wiki/Q1283324","display_name":"Refining (metallurgy)","level":2,"score":0.34279999136924744},{"id":"https://openalex.org/C60044698","wikidata":"https://www.wikidata.org/wiki/Q1283324","display_name":"Refining (metallurgy)","level":2,"score":0.34279999136924744},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3391000032424927},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3391000032424927},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.3345000147819519},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.3345000147819519},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.32829999923706055},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.32829999923706055},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.321399986743927},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.321399986743927},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.30730000138282776},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.30730000138282776},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.3041999936103821},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.3041999936103821},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C2779982483","wikidata":"https://www.wikidata.org/wiki/Q6094420","display_name":"Iterative refinement","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C2779982483","wikidata":"https://www.wikidata.org/wiki/Q6094420","display_name":"Iterative refinement","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.25999999046325684},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.25999999046325684}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.15417","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.15417","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.15417","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.15417","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.797221839427948,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,42,119],"propose":[1,43],"Ambient":[2,95,103],"Dataloops,":[3],"an":[4],"iterative":[5],"framework":[6,128],"for":[7,14,98,125],"refining":[8],"datasets":[9,24],"that":[10,129],"makes":[11],"it":[12],"easier":[13],"diffusion":[15],"models":[16],"to":[17],"learn":[18],"the":[19,54,61,75,89,126,131,134],"underlying":[20],"data":[21,37,135],"distribution.":[22],"Modern":[23],"contain":[25],"samples":[26,78],"of":[27,51,133],"highly":[28],"varying":[29],"quality,":[30,59],"and":[31,60,92,110,114],"training":[32],"directly":[33],"on":[34],"such":[35],"heterogeneous":[36],"often":[38],"yields":[39],"suboptimal":[40],"models.":[41],"a":[44,83,122],"dataset-model":[45],"co-evolution":[46],"process;":[47],"at":[48,70,82],"each":[49,71],"iteration":[50],"our":[52],"method,":[53],"dataset":[55],"becomes":[56],"progressively":[57],"higher":[58],"model":[62],"improves":[63],"accordingly.":[64],"To":[65],"avoid":[66],"destructive":[67],"self-consuming":[68],"loops,":[69],"generation,":[72],"we":[73,93],"treat":[74],"synthetically":[76],"improved":[77],"as":[79],"noisy,":[80],"but":[81],"slightly":[84],"lower":[85],"noisy":[86],"level":[87],"than":[88],"previous":[90],"iteration,":[91],"use":[94],"Diffusion":[96],"techniques":[97],"learning":[99],"under":[100],"corruption.":[101],"Empirically,":[102],"Dataloops":[104],"achieve":[105],"state-of-the-art":[106],"performance":[107],"in":[108],"unconditional":[109],"text-conditional":[111],"image":[112],"generation":[113],"de":[115],"novo":[116],"protein":[117],"design.":[118],"further":[120],"provide":[121],"theoretical":[123],"justification":[124],"proposed":[127],"captures":[130],"benefits":[132],"looping":[136],"procedure.":[137]},"counts_by_year":[],"updated_date":"2026-01-24T23:27:35.965710","created_date":"2026-01-24T00:00:00"}
