{"id":"https://openalex.org/W7140106362","doi":"https://doi.org/10.18653/v1/2026.eacl-long.7","title":"Beyond Sample-Level Feedback: Using Reference-Level Feedback to Guide Data Synthesis","display_name":"Beyond Sample-Level Feedback: Using Reference-Level Feedback to Guide Data Synthesis","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7140106362","doi":"https://doi.org/10.18653/v1/2026.eacl-long.7"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2026.eacl-long.7","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.7","pdf_url":"https://aclanthology.org/2026.eacl-long.7.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2026.eacl-long.7.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130398330","display_name":"Shuhaib Mehri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shuhaib Mehri","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130396207","display_name":"Xiusi Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiusi Chen","raw_affiliation_strings":["University of Illinois Urbana-Champaign"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130397083","display_name":"Heng Ji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heng Ji","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130356993","display_name":"Dilek Hakkani-T\u00fcr","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dilek Hakkani-T\u00fcr","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37429153,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"141","last_page":"164"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10826","display_name":"Behavioral and Psychological Studies","score":0.061400000005960464,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10826","display_name":"Behavioral and Psychological Studies","score":0.061400000005960464,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11875","display_name":"Statistics Education and Methodologies","score":0.05480000004172325,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10467","display_name":"Psychometric Methodologies and Testing","score":0.04100000113248825,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.29589998722076416},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.2702000141143799},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.26980000734329224},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.2623000144958496},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.25769999623298645}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.571399986743927},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.29589998722076416},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28850001096725464},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.2623000144958496},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C17500928","wikidata":"https://www.wikidata.org/wiki/Q959968","display_name":"Control system","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.2475000023841858},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.23960000276565552}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2026.eacl-long.7","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.7","pdf_url":"https://aclanthology.org/2026.eacl-long.7.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2026.eacl-long.7","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.7","pdf_url":"https://aclanthology.org/2026.eacl-long.7.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7140106362.pdf","grobid_xml":"https://content.openalex.org/works/W7140106362.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"High-quality":[0],"instruction-tuning":[1],"data":[2,22,42,127],"is":[3],"crucial":[4],"for":[5,28],"developing":[6],"Large":[7],"Language":[8],"Models":[9],"(LLMs)":[10],"that":[11,58,109],"can":[12],"effectively":[13],"navigate":[14],"real-world":[15],"tasks":[16],"and":[17,87,122,125],"follow":[18],"human":[19],"instructions.While":[20],"synthetic":[21],"generation":[23],"offers":[24],"a":[25,34,56,80,100],"scalable":[26],"approach":[27],"creating":[29],"such":[30],"datasets,":[31],"it":[32],"imposes":[33],"quality":[35],"ceiling":[36],"where":[37],"models":[38],"trained":[39],"on":[40,89,104],"the":[41,45,69],"cannot":[43],"outperform":[44],"LLM":[46],"generating":[47],"it.To":[48],"overcome":[49],"this":[50,75],"limitation,":[51],"we":[52,77],"introduce":[53],"REFERENCE-LEVEL":[54,110],"FEEDBACK,":[55],"paradigm":[57],"extracts":[59],"desirable":[60],"characteristics":[61],"from":[62],"carefully":[63],"curated":[64],"reference":[65],"samples":[66],"to":[67],"guide":[68],"synthesis":[70],"of":[71,82],"higher-quality":[72],"instruction-response":[73,84],"pairs.Using":[74],"approach,":[76],"synthesize":[78],"REFED,":[79],"dataset":[81],"10K":[83],"pairs.Fine-tuning":[85],"Llama-3.1-8B-Instruct":[86],"Mistral-7B-Instruct":[88],"REFED":[90],"demonstrate":[91,108],"state-of-the-art":[92],"performance":[93],"among":[94],"similarly":[95],"sized":[96],"models,":[97],"notably":[98],"reaching":[99],"43.96%":[101],"length-controlled":[102],"winrate":[103],"AlpacaEval":[105],"2.0.Extensive":[106],"experiments":[107],"FEEDBACK":[111],"consistently":[112],"outperforms":[113],"traditional":[114],"sample-level":[115],"feedback":[116],"methods,":[117],"generalizes":[118],"across":[119],"model":[120],"architectures,":[121],"produces":[123],"high-quality":[124],"diverse":[126],"at":[128],"low":[129],"cost.":[130]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-24T00:00:00"}
