{"id":"https://openalex.org/W7161172327","doi":"https://doi.org/10.1109/dcc66757.2026.00071","title":"Data Compression for AI Model Training","display_name":"Data Compression for AI Model Training","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7161172327","doi":"https://doi.org/10.1109/dcc66757.2026.00071"},"language":null,"primary_location":{"id":"doi:10.1109/dcc66757.2026.00071","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dcc66757.2026.00071","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 Data Compression Conference (DCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100290424","display_name":"Dov Kruger","orcid":null},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dov Kruger","raw_affiliation_strings":["Rutgers University,Piscataway,NJ,USA,08854"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rutgers University,Piscataway,NJ,USA,08854","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050709486","display_name":"Yulia Kumar","orcid":"https://orcid.org/0000-0002-7621-2734"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yulia Kumar","raw_affiliation_strings":["Rutgers University,Piscataway,NJ,USA,08854"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rutgers University,Piscataway,NJ,USA,08854","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050751559","display_name":"J . Jenny Li","orcid":null},"institutions":[{"id":"https://openalex.org/I47449453","display_name":"Kean University","ror":"https://ror.org/04wzzqn13","country_code":"US","type":"education","lineage":["https://openalex.org/I47449453"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"J. Jenny Li","raw_affiliation_strings":["Kean University,Union,NJ,USA,07083"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kean University,Union,NJ,USA,07083","institution_ids":["https://openalex.org/I47449453"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100290424"],"corresponding_institution_ids":["https://openalex.org/I102322142"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.95332768,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"444","last_page":"444"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.18930000066757202,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.18930000066757202,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.06279999762773514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.05429999902844429,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ascii","display_name":"ASCII","score":0.8294000029563904},{"id":"https://openalex.org/keywords/byte","display_name":"Byte","score":0.6215999722480774},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5509999990463257},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.5070000290870667},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4862000048160553},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4814999997615814},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.4699000120162964},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.43470001220703125},{"id":"https://openalex.org/keywords/bitmap","display_name":"Bitmap","score":0.43230000138282776},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.42910000681877136}],"concepts":[{"id":"https://openalex.org/C196832560","wikidata":"https://www.wikidata.org/wiki/Q8815","display_name":"ASCII","level":2,"score":0.8294000029563904},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7950000166893005},{"id":"https://openalex.org/C43364308","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Byte","level":2,"score":0.6215999722480774},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5509999990463257},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.5070000290870667},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4862000048160553},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4814999997615814},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.4699000120162964},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.43470001220703125},{"id":"https://openalex.org/C3115412","wikidata":"https://www.wikidata.org/wiki/Q1194708","display_name":"Bitmap","level":2,"score":0.43230000138282776},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.42910000681877136},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.40230000019073486},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.3864000141620636},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.37540000677108765},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37049999833106995},{"id":"https://openalex.org/C97250363","wikidata":"https://www.wikidata.org/wiki/Q235557","display_name":"File format","level":2,"score":0.3553999960422516},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.3465000092983246},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3449999988079071},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3427000045776367},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.3400999903678894},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3359000086784363},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3325999975204468},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.32249999046325684},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.31839999556541443},{"id":"https://openalex.org/C2776235265","wikidata":"https://www.wikidata.org/wiki/Q18392052","display_name":"Fragment (logic)","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.29809999465942383},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.2978000044822693},{"id":"https://openalex.org/C63435697","wikidata":"https://www.wikidata.org/wiki/Q864135","display_name":"Binary code","level":3,"score":0.29420000314712524},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C2779190172","wikidata":"https://www.wikidata.org/wiki/Q4913888","display_name":"Binary data","level":3,"score":0.28839999437332153},{"id":"https://openalex.org/C46900642","wikidata":"https://www.wikidata.org/wiki/Q2647","display_name":"Huffman coding","level":3,"score":0.28519999980926514},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.2818000018596649},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.28119999170303345},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C171730128","wikidata":"https://www.wikidata.org/wiki/Q5227290","display_name":"Data file","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C3913047","wikidata":"https://www.wikidata.org/wiki/Q1956265","display_name":"sync","level":3,"score":0.26019999384880066},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.2540000081062317},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dcc66757.2026.00071","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dcc66757.2026.00071","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 Data Compression Conference (DCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Training":[0],"AI":[1],"models":[2],"requires":[3],"ingesting":[4],"massive":[5],"amounts":[6],"of":[7,40,95,201],"training":[8],"data.":[9,190],"We":[10],"define":[11],"Parametric":[12],"Matching":[13],"(PM),":[14],"a":[15,193],"grammar-based":[16],"compression":[17],"technique":[18],"that":[19,42,55,152,160,196],"parses":[20],"structured":[21],"text":[22,62],"into":[23,37,125],"abstract":[24],"syntax":[25],"trees,":[26],"replaces":[27],"format":[28,174],"patterns":[29],"with":[30,136],"compact":[31],"tokens,":[32],"and":[33,63,78,100,121,130,140,185],"moves":[34],"numerical":[35],"parameters":[36],"homogeneous":[38],"streams":[39,54],"tokens":[41],"can":[43,117,122,156,175],"additionally":[44],"be":[45,118,123,176],"bit":[46],"compressed":[47,77],"using":[48],"LZMA.":[49],"The":[50,146],"results":[51,147],"are":[52,56,161,217],"input":[53],"equivalent":[57],"to":[58,82,102,106,169,182,213,220],"the":[59,113,153,172,199,202,211],"original":[60,203],"interleaved":[61],"data":[64,89,158],"in":[65,148,179],"formats":[66],"such":[67,71],"as":[68,72,109],"SVG,":[69,137],"3D-models":[70],"OBJ,":[73],"PDF,":[74],"but":[75,166],"highly":[76],"much":[79,91],"more":[80,128,132,164],"amenable":[81],"processing.":[83],"PM":[84,154],"has":[85],"many":[86],"advantages.":[87],"Loading":[88],"is":[90,197],"faster":[92,105,168],"(a":[93],"factor":[94],"20":[96],"for":[97],"3D":[98],"models,":[99],"100":[101],"200":[103],"times":[104],"load).":[107],"Just":[108],"important,":[110],"by":[111,206],"preprocessing":[112],"semantics,":[114],"ambiguous":[115],"cases":[116],"resolved":[119],"once,":[120],"turned":[124],"embedding":[126],"vectors":[127],"efficiently,":[129],"potentially":[131],"accurately.":[133],"Our":[134],"experiments":[135],"G-Code,":[138],"OBJ":[139],"PDF":[141],"files":[142],"show":[143,151],"large":[144],"compression.":[145],"Table":[149],"1":[150],"algorithm":[155],"create":[157],"objects":[159],"not":[162],"only":[163],"compressible":[165],"far":[167],"load":[170,183],"because":[171,215],"binary":[173,194],"instantly":[177],"used,":[178],"this":[180],"case":[181],"OpenGL":[184],"render":[186],"without":[187],"parsing":[188],"ASCII":[189,204],"When":[191],"compressing":[192],"object":[195],"half":[198,210],"size":[200],"representation,":[205],"definition":[207],"LZMA":[208],"takes":[209],"time":[212],"compress":[214],"there":[216],"fewer":[218],"bytes":[219],"process.":[221]},"counts_by_year":[],"updated_date":"2026-05-16T06:04:12.930555","created_date":"2026-05-15T00:00:00"}
