{"id":"https://openalex.org/W4415428095","doi":"https://doi.org/10.3233/faia251322","title":"Large Language Models and Normalized Compression Distance: Better Compression Yet Worse Accuracy","display_name":"Large Language Models and Normalized Compression Distance: Better Compression Yet Worse Accuracy","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415428095","doi":"https://doi.org/10.3233/faia251322"},"language":null,"primary_location":{"id":"doi:10.3233/faia251322","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251322","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia251322","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034493725","display_name":"John Hurwitz","orcid":null},"institutions":[{"id":"https://openalex.org/I79272384","display_name":"University of Maryland, Baltimore County","ror":"https://ror.org/02qskvh78","country_code":"US","type":"education","lineage":["https://openalex.org/I79272384"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John Hurwitz","raw_affiliation_strings":["University of Maryland, Baltimore County"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Maryland, Baltimore County","institution_ids":["https://openalex.org/I79272384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025012064","display_name":"Charles Nicholas","orcid":"https://orcid.org/0000-0001-9494-7139"},"institutions":[{"id":"https://openalex.org/I79272384","display_name":"University of Maryland, Baltimore County","ror":"https://ror.org/02qskvh78","country_code":"US","type":"education","lineage":["https://openalex.org/I79272384"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Charles Nicholas","raw_affiliation_strings":["University of Maryland, Baltimore County"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Maryland, Baltimore County","institution_ids":["https://openalex.org/I79272384"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068036546","display_name":"Edward Raff","orcid":"https://orcid.org/0000-0002-9900-1972"},"institutions":[{"id":"https://openalex.org/I79272384","display_name":"University of Maryland, Baltimore County","ror":"https://ror.org/02qskvh78","country_code":"US","type":"education","lineage":["https://openalex.org/I79272384"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Edward Raff","raw_affiliation_strings":["CrowdStrike","University of Maryland, Baltimore County"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CrowdStrike","institution_ids":[]},{"raw_affiliation_string":"University of Maryland, Baltimore County","institution_ids":["https://openalex.org/I79272384"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.363,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.9348003,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9807000160217285,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9596999883651733,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.7721999883651733},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.7552000284194946},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.7294999957084656},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5654000043869019},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.47679999470710754},{"id":"https://openalex.org/keywords/compression-ratio","display_name":"Compression ratio","score":0.38850000500679016},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.375900000333786},{"id":"https://openalex.org/keywords/data-compression-ratio","display_name":"Data compression ratio","score":0.3596000075340271}],"concepts":[{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.7721999883651733},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.7552000284194946},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7337999939918518},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.7294999957084656},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5654000043869019},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5273000001907349},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.47679999470710754},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4278999865055084},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.38850000500679016},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.375900000333786},{"id":"https://openalex.org/C94835093","wikidata":"https://www.wikidata.org/wiki/Q3113333","display_name":"Data compression ratio","level":5,"score":0.3596000075340271},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.3522999882698059},{"id":"https://openalex.org/C165021410","wikidata":"https://www.wikidata.org/wiki/Q55564","display_name":"Lossy compression","level":2,"score":0.33390000462532043},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3222000002861023},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3190000057220459},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3151000142097473},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.29670000076293945},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.29440000653266907},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2906000018119812},{"id":"https://openalex.org/C2778192920","wikidata":"https://www.wikidata.org/wiki/Q16874989","display_name":"Signal compression","level":4,"score":0.2791000008583069},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.2766999900341034},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C131097465","wikidata":"https://www.wikidata.org/wiki/Q178898","display_name":"Gas compressor","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.258899986743927},{"id":"https://openalex.org/C32653426","wikidata":"https://www.wikidata.org/wiki/Q3813641","display_name":"Background subtraction","level":3,"score":0.2524000108242035}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia251322","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251322","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia251322","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251322","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"It":[0],"is":[1,108],"generally":[2],"well":[3],"understood":[4],"that":[5,31,97],"predictive":[6],"classification":[7,93],"and":[8,30,61,81],"compression":[9,33,52,113],"are":[10,22],"intrinsically":[11],"related":[12],"concepts":[13],"in":[14,90,134],"information":[15],"theory.":[16],"Indeed,":[17],"many":[18],"deep":[19],"learning":[20,25],"methods":[21],"explained":[23],"as":[24,53],"a":[26,78,91,105],"kind":[27],"of":[28,56],"compression,":[29,126],"better":[32,36,102,125],"leads":[34],"to":[35,84,122,131,137],"performance.":[37],"We":[38,95],"interrogate":[39],"this":[40],"hypothesis":[41],"via":[42],"the":[43,54,116,138],"Normalized":[44],"Compression":[45],"Distance":[46],"(NCD),":[47],"which":[48],"explicitly":[49],"relies":[50],"on":[51],"means":[55],"measuring":[57],"similarity":[58],"between":[59],"sequences":[60],"thus":[62],"enables":[63],"nearest-neighbor":[64],"classification.":[65],"By":[66],"turning":[67],"popular":[68],"large":[69],"language":[70],"models":[71],"(LLMs)":[72],"into":[73],"lossless":[74],"compressors,":[75],"we":[76,127],"develop":[77],"Neural":[79],"NCD":[80],"compare":[82],"LLMs":[83],"classic":[85],"general-purpose":[86],"algorithms":[87],"like":[88],"gzip":[89,106,139],"compression-distance-based":[92],"setting.":[94],"find":[96],"whether":[98],"neural":[99,118],"compressors":[100],"achieve":[101],"accuracy":[103,135],"over":[104],"baseline":[107],"dataset-dependent,":[109],"despite":[110],"consistently":[111],"superior":[112],"ratios.":[114],"Though":[115],"best":[117],"compressor":[119],"achieves":[120],"up":[121,130],"7.49":[123],"times":[124],"observe":[128],"an":[129],"22.2%":[132],"decrease":[133],"relative":[136],"baseline.":[140]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-24T00:00:00"}
