{"id":"https://openalex.org/W7137814876","doi":"https://doi.org/10.1609/aaai.v40i27.39379","title":"Optimized Algorithms for Text Clustering with LLM-Generated Constraints","display_name":"Optimized Algorithms for Text Clustering with LLM-Generated Constraints","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137814876","doi":"https://doi.org/10.1609/aaai.v40i27.39379"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i27.39379","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i27.39379","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39379/43340","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39379/43340","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123332036","display_name":"Chaoqi Jia","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135439","display_name":"RMIT Europe","ror":"https://ror.org/03m3ca021","country_code":"ES","type":"education","lineage":["https://openalex.org/I4210135439","https://openalex.org/I82951845"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Chaoqi Jia","raw_affiliation_strings":["Royal Melbourne Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Royal Melbourne Institute of Technology","institution_ids":["https://openalex.org/I4210135439"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123347466","display_name":"Weihong Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I80947539","display_name":"Fuzhou University","ror":"https://ror.org/011xvna82","country_code":"CN","type":"education","lineage":["https://openalex.org/I80947539"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihong Wu","raw_affiliation_strings":["Fuzhou University"],"affiliations":[{"raw_affiliation_string":"Fuzhou University","institution_ids":["https://openalex.org/I80947539"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121753410","display_name":"Longkun Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I80947539","display_name":"Fuzhou University","ror":"https://ror.org/011xvna82","country_code":"CN","type":"education","lineage":["https://openalex.org/I80947539"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longkun Guo","raw_affiliation_strings":["Fuzhou University"],"affiliations":[{"raw_affiliation_string":"Fuzhou University","institution_ids":["https://openalex.org/I80947539"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123346059","display_name":"Zhigang Lu","orcid":null},"institutions":[{"id":"https://openalex.org/I63525965","display_name":"Western Sydney University","ror":"https://ror.org/03t52dk35","country_code":"AU","type":"education","lineage":["https://openalex.org/I63525965"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zhigang Lu","raw_affiliation_strings":["Western Sydney University"],"affiliations":[{"raw_affiliation_string":"Western Sydney University","institution_ids":["https://openalex.org/I63525965"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129697602","display_name":"Chao Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135439","display_name":"RMIT Europe","ror":"https://ror.org/03m3ca021","country_code":"ES","type":"education","lineage":["https://openalex.org/I4210135439","https://openalex.org/I82951845"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Chao Chen","raw_affiliation_strings":["Royal Melbourne Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Royal Melbourne Institute of Technology","institution_ids":["https://openalex.org/I4210135439"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025217867","display_name":"Kok-leong Ong","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135439","display_name":"RMIT Europe","ror":"https://ror.org/03m3ca021","country_code":"ES","type":"education","lineage":["https://openalex.org/I4210135439","https://openalex.org/I82951845"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Kok-Leong Ong","raw_affiliation_strings":["Royal Melbourne Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Royal Melbourne Institute of Technology","institution_ids":["https://openalex.org/I4210135439"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5123332036"],"corresponding_institution_ids":["https://openalex.org/I4210135439"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03249867,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"27","first_page":"22229","last_page":"22237"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.638700008392334,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.638700008392334,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.05079999938607216,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.03350000083446503,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8406999707221985},{"id":"https://openalex.org/keywords/constrained-clustering","display_name":"Constrained clustering","score":0.6365000009536743},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.6089000105857849},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.6029000282287598},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.5151000022888184},{"id":"https://openalex.org/keywords/data-stream-clustering","display_name":"Data stream clustering","score":0.4745999872684479},{"id":"https://openalex.org/keywords/resource-consumption","display_name":"Resource consumption","score":0.4650999903678894},{"id":"https://openalex.org/keywords/canopy-clustering-algorithm","display_name":"Canopy clustering algorithm","score":0.45680001378059387}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8406999707221985},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7526000142097473},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6528000235557556},{"id":"https://openalex.org/C27964816","wikidata":"https://www.wikidata.org/wiki/Q5164359","display_name":"Constrained clustering","level":5,"score":0.6365000009536743},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.6089000105857849},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.6029000282287598},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.5151000022888184},{"id":"https://openalex.org/C193143536","wikidata":"https://www.wikidata.org/wiki/Q5227360","display_name":"Data stream clustering","level":5,"score":0.4745999872684479},{"id":"https://openalex.org/C2777480716","wikidata":"https://www.wikidata.org/wiki/Q23582796","display_name":"Resource consumption","level":2,"score":0.4650999903678894},{"id":"https://openalex.org/C104047586","wikidata":"https://www.wikidata.org/wiki/Q5033439","display_name":"Canopy clustering algorithm","level":4,"score":0.45680001378059387},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.44110000133514404},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.41359999775886536},{"id":"https://openalex.org/C17212007","wikidata":"https://www.wikidata.org/wiki/Q5511111","display_name":"Fuzzy clustering","level":3,"score":0.38089999556541443},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.36169999837875366},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.3515999913215637},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3312999904155731},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32260000705718994},{"id":"https://openalex.org/C184509293","wikidata":"https://www.wikidata.org/wiki/Q5136711","display_name":"Clustering high-dimensional data","level":3,"score":0.3190999925136566},{"id":"https://openalex.org/C106516650","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm design","level":2,"score":0.30820000171661377},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2809000015258789},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.27410000562667847},{"id":"https://openalex.org/C3017813396","wikidata":"https://www.wikidata.org/wiki/Q17078173","display_name":"Resource constraints","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C144817290","wikidata":"https://www.wikidata.org/wiki/Q2976575","display_name":"Biclustering","level":5,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i27.39379","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i27.39379","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39379/43340","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i27.39379","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i27.39379","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39379/43340","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6535652875900269,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4020255992","display_name":null,"funder_award_id":"Project","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7677024596","display_name":null,"funder_award_id":"12271098","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321878","display_name":"Natural Science Foundation of Fujian Province","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7137814876.pdf","grobid_xml":"https://content.openalex.org/works/W7137814876.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Clustering":[0],"is":[1,54],"a":[2,11,71,104,118,122],"fundamental":[3],"tool":[4],"that":[5,75,152],"has":[6],"garnered":[7],"significant":[8],"interest":[9,56],"across":[10],"wide":[12],"range":[13],"of":[14,34,48,112,142,167],"applications":[15],"including":[16],"text":[17,136],"analysis.":[18],"To":[19],"improve":[20],"clustering":[21,42,59,106,147,156],"accuracy,":[22],"many":[23],"researchers":[24],"have":[25],"proposed":[26],"incorporating":[27],"background":[28],"knowledge,":[29],"typically":[30],"in":[31,57],"the":[32,41,45,110,140,160,165],"form":[33],"must\u2011link":[35],"and":[36,94,121,145],"cannot\u2011link":[37],"constraints,":[38],"to":[39,98,109,125,159],"guide":[40],"process.":[43],"With":[44],"recent":[46],"advent":[47],"large":[49],"language":[50],"models":[51],"(LLMs),":[52],"there":[53],"growing":[55],"improving":[58],"quality":[60],"through":[61],"LLM-based":[62],"automatic":[63],"constraint":[64,81,95,143],"generation.":[65],"In":[66],"this":[67],"paper,":[68],"we":[69],"propose":[70],"novel":[72],"constraint\u2011generation":[73],"approach":[74,133],"reduces":[76],"resource":[77],"consumption":[78],"by":[79,170],"generating":[80],"sets":[82],"rather":[83],"than":[84,172],"using":[85],"traditional":[86],"pairwise":[87],"constraints.":[88,114,129],"This":[89],"improves":[90],"both":[91,139],"query":[92],"efficiency":[93],"accuracy":[96,157],"compared":[97],"state\u2011of\u2011the\u2011art":[99],"methods.":[100],"We":[101,130],"further":[102],"introduce":[103],"constrained":[105],"algorithm":[107],"tailored":[108],"characteristics":[111],"LLM-generated":[113],"Our":[115],"method":[116,154],"incorporates":[117],"confidence":[119],"threshold":[120],"penalty":[123],"mechanism":[124],"address":[126],"potentially":[127],"inaccurate":[128],"evaluate":[131],"our":[132,153],"on":[134],"five":[135],"datasets,":[137],"considering":[138],"cost":[141],"generation":[144],"overall":[146],"performance.":[148],"The":[149],"results":[150],"show":[151],"achieves":[155],"comparable":[158],"state-of-the-art":[161],"algorithms":[162],"while":[163],"reducing":[164],"number":[166],"LLM":[168],"queries":[169],"more":[171],"20":[173],"times.":[174]},"counts_by_year":[],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2026-03-18T00:00:00"}
