{"id":"https://openalex.org/W4416037036","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.293","title":"AQuilt: Weaving Logic and Self-Inspection into Low-Cost, High-Relevance Data Synthesis for Specialist LLMs","display_name":"AQuilt: Weaving Logic and Self-Inspection into Low-Cost, High-Relevance Data Synthesis for Specialist LLMs","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416037036","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.293"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.293","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.293","pdf_url":"https://aclanthology.org/2025.emnlp-main.293.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.293.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073700416","display_name":"Xiaopeng Ke","orcid":"https://orcid.org/0009-0006-0039-4013"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xiaopeng Ke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043368238","display_name":"Hexuan Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hexuan Deng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029551526","display_name":"Xuebo Liu","orcid":"https://orcid.org/0000-0001-6370-2868"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xuebo Liu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056432044","display_name":"Jun Rao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jun Rao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045795037","display_name":"Zhenxi Song","orcid":"https://orcid.org/0000-0001-8574-0857"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhenxi Song","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054413430","display_name":"Jun Yu","orcid":"https://orcid.org/0000-0003-0575-9334"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jun Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5055013560","display_name":"Min Zhang","orcid":"https://orcid.org/0000-0002-8186-9159"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Min Zhang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5073700416"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31834959,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5763","last_page":"5796"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.1103999987244606,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.1103999987244606,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.10249999910593033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.06520000100135803,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/weaving","display_name":"Weaving","score":0.7537000179290771},{"id":"https://openalex.org/keywords/craft","display_name":"Craft","score":0.27160000801086426},{"id":"https://openalex.org/keywords/production","display_name":"Production (economics)","score":0.26930001378059387},{"id":"https://openalex.org/keywords/government","display_name":"Government (linguistics)","score":0.2676999866962433}],"concepts":[{"id":"https://openalex.org/C54525549","wikidata":"https://www.wikidata.org/wiki/Q2553445","display_name":"Weaving","level":2,"score":0.7537000179290771},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.39250001311302185},{"id":"https://openalex.org/C39549134","wikidata":"https://www.wikidata.org/wiki/Q133080","display_name":"Public relations","level":1,"score":0.3797000050544739},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.36169999837875366},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3239000141620636},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.29120001196861267},{"id":"https://openalex.org/C110354214","wikidata":"https://www.wikidata.org/wiki/Q6314146","display_name":"Engineering management","level":1,"score":0.2840999960899353},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.2824999988079071},{"id":"https://openalex.org/C55587333","wikidata":"https://www.wikidata.org/wiki/Q1133029","display_name":"Engineering ethics","level":1,"score":0.27639999985694885},{"id":"https://openalex.org/C2779732396","wikidata":"https://www.wikidata.org/wiki/Q2207288","display_name":"Craft","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.26930001378059387},{"id":"https://openalex.org/C2778137410","wikidata":"https://www.wikidata.org/wiki/Q2732820","display_name":"Government (linguistics)","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.26030001044273376},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.2590000033378601},{"id":"https://openalex.org/C195094911","wikidata":"https://www.wikidata.org/wiki/Q14167904","display_name":"Process management","level":1,"score":0.2563000023365021},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.2538999915122986},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.293","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.293","pdf_url":"https://aclanthology.org/2025.emnlp-main.293.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.293","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.293","pdf_url":"https://aclanthology.org/2025.emnlp-main.293.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5662869619","display_name":null,"funder_award_id":"2024A1515011491","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"}],"funders":[{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416037036.pdf","grobid_xml":"https://content.openalex.org/works/W4416037036.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"the":[1,139],"impressive":[2],"performance":[3,47],"of":[4,117,138],"large":[5],"language":[6],"models":[7],"(LLMs)":[8],"in":[9,15],"general":[10],"domains,":[11],"they":[12],"often":[13],"underperform":[14],"specialized":[16,71],"domains.Existing":[17],"approaches":[18],"typically":[19],"rely":[20],"on":[21],"data":[22,32,68,106,124,147],"synthesis":[23,125],"methods":[24,38],"and":[25,84,89,95,156],"yield":[26],"promising":[27],"results":[28],"by":[29],"using":[30],"unlabeled":[31,75],"to":[33,97,120,132,151],"capture":[34],"domain-specific":[35],"features.However,":[36],"these":[37,58],"either":[39],"incur":[40],"high":[41],"computational":[42],"costs":[43],"or":[44],"suffer":[45],"from":[46,73],"limitations,":[48],"while":[49,134],"also":[50],"demonstrating":[51],"insufficient":[52],"generalization":[53],"across":[54],"different":[55],"tasks.To":[56],"address":[57],"challenges,":[59],"we":[60,91,113],"propose":[61],"AQuilt,":[62],"a":[63,111,115,122],"framework":[64],"for":[65,69,108],"constructing":[66],"instruction-tuning":[67],"any":[70,109],"domains":[72],"corresponding":[74],"data,":[76,81],"including":[77],"Answer,":[78],"Question,":[79],"Unlabeled":[80],"Inspection,":[82],"Logic,":[83],"Task":[85],"type.By":[86],"incorporating":[87],"logic":[88],"inspection,":[90],"encourage":[92],"reasoning":[93],"processes":[94],"self-inspection":[96],"enhance":[98],"model":[99],"performance.Moreover,":[100],"customizable":[101],"task":[102],"instructions":[103],"enable":[104],"high-quality":[105],"generation":[107],"task.As":[110],"result,":[112],"construct":[114],"dataset":[116],"703k":[118],"examples":[119],"train":[121],"powerful":[123],"model.Experiments":[126],"show":[127],"that":[128,144],"AQuilt":[129],"is":[130],"comparable":[131],"DeepSeek-V3":[133],"utilizing":[135],"just":[136],"17%":[137],"production":[140],"cost.Further":[141],"analysis":[142],"demonstrates":[143],"our":[145],"generated":[146],"exhibits":[148],"higher":[149],"relevance":[150],"downstream":[152],"tasks.Source":[153],"code,":[154],"models,":[155],"scripts":[157],"are":[158],"available":[159],"at":[160],"https://github.com/":[161],"Krueske/AQuilt.":[162]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-11-08T00:00:00"}
