{"id":"https://openalex.org/W4401864159","doi":"https://doi.org/10.1145/3637528.3671904","title":"Scaling Training Data with Lossy Image Compression","display_name":"Scaling Training Data with Lossy Image Compression","publication_year":2024,"publication_date":"2024-08-24","ids":{"openalex":"https://openalex.org/W4401864159","doi":"https://doi.org/10.1145/3637528.3671904"},"language":"en","primary_location":{"id":"doi:10.1145/3637528.3671904","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3637528.3671904","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011636210","display_name":"Katherine L. Mentzer","orcid":"https://orcid.org/0000-0002-7710-1841"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Katherine L Mentzer","raw_affiliation_strings":["Granica, Mountain View, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-7710-1841","affiliations":[{"raw_affiliation_string":"Granica, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011999109","display_name":"Andrea Montanari","orcid":"https://orcid.org/0000-0002-0267-8574"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Andrea Montanari","raw_affiliation_strings":["Granica, Mountain View, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-0267-8574","affiliations":[{"raw_affiliation_string":"Granica, Mountain View, CA, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5011636210"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12609863,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2212","last_page":"2223"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lossy-compression","display_name":"Lossy compression","score":0.9191603660583496},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6935702562332153},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.6636210680007935},{"id":"https://openalex.org/keywords/image-compression","display_name":"Image compression","score":0.6254217624664307},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.5853488445281982},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.569642961025238},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.5203544497489929},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4554038941860199},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4261441230773926},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.38822680711746216},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3423084616661072},{"id":"https://openalex.org/keywords/image-processing","display_name":"Image processing","score":0.2550911605358124},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.11405578255653381},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.0959838330745697},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08884444832801819}],"concepts":[{"id":"https://openalex.org/C165021410","wikidata":"https://www.wikidata.org/wiki/Q55564","display_name":"Lossy compression","level":2,"score":0.9191603660583496},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6935702562332153},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.6636210680007935},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.6254217624664307},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.5853488445281982},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.569642961025238},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.5203544497489929},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4554038941860199},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4261441230773926},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38822680711746216},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3423084616661072},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2550911605358124},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.11405578255653381},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0959838330745697},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08884444832801819},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3637528.3671904","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3637528.3671904","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W12634471","https://openalex.org/W2096613063","https://openalex.org/W2194775991","https://openalex.org/W2340897893","https://openalex.org/W2946862972","https://openalex.org/W3007943565","https://openalex.org/W3090091013","https://openalex.org/W3180854701","https://openalex.org/W4255272544","https://openalex.org/W4297833882","https://openalex.org/W4302067267","https://openalex.org/W4306820534","https://openalex.org/W4312933868","https://openalex.org/W6784388680"],"related_works":["https://openalex.org/W3210332869","https://openalex.org/W2547124190","https://openalex.org/W4210785996","https://openalex.org/W4327499886","https://openalex.org/W2385628723","https://openalex.org/W4313046148","https://openalex.org/W3180760233","https://openalex.org/W2096442341","https://openalex.org/W4210455546","https://openalex.org/W3080614128"],"abstract_inverted_index":{"Empirically-determined":[0],"scaling":[1,142],"laws":[2],"have":[3,27],"been":[4,28],"broadly":[5],"successful":[6],"in":[7,74],"predicting":[8],"the":[9,32,90,112,115,146,184,198,223,229,234],"evolution":[10,148],"of":[11,21,34,82,87,92,98,114,135,149,157,232],"large":[12],"machine":[13],"learning":[14],"models":[15,205,220],"with":[16,152,217],"training":[17,136],"data":[18,50,106],"and":[19,49,132,155,174],"number":[20,81,91,156],"parameters.":[22,186],"As":[23],"a":[24,58,63,75,79,85,140,168,212],"consequence,":[25],"they":[26],"useful":[29],"for":[30,171],"optimizing":[31],"allocation":[33],"limited":[35],"resources,":[36],"most":[37],"notably":[38],"compute":[39],"time.In":[40],"certain":[41],"applications,":[42],"storage":[43,134],"space":[44],"is":[45,62],"an":[46],"important":[47],"constraint,":[48],"format":[51,77],"needs":[52],"to":[53,95,128,196,219],"be":[54,101,194],"chosen":[55],"carefully":[56],"as":[57],"consequence.":[59],"Computer":[60],"vision":[61,181],"prominent":[64],"example:":[65],"images":[66,210],"are":[67,71],"inherently":[68],"analog,":[69],"but":[70],"always":[72],"stored":[73],"digital":[76,88],"using":[78,104],"finite":[80],"bits.":[83],"Given":[84],"dataset":[86],"images,":[89,120],"bits":[93,158],"L":[94],"store":[96],"each":[97,122],"them":[99],"can":[100,110,193],"further":[102],"reduced":[103],"lossy":[105,199],"compression.":[107],"This,":[108],"however,":[109],"degrade":[111],"quality":[113],"model":[116,170],"trained":[117,206,221],"on":[118,178,207,222],"such":[119],"since":[121],"example":[123],"has":[124],"lower":[125],"resolution.In":[126],"order":[127],"capture":[129],"this":[130,164,191],"trade-off":[131],"optimize":[133,197],"data,":[137],"we":[138,227],"propose":[139],"'storage":[141],"law'":[143],"that":[144,163,190],"describes":[145],"joint":[147],"test":[150,215],"error":[151,216],"sample":[153],"size":[154],"per":[159],"image.":[160],"We":[161,187],"prove":[162],"law":[165,192],"holds":[166],"within":[167],"stylized":[169],"image":[172],"compression,":[173],"verify":[175],"it":[176],"empirically":[177],"two":[179],"computer":[180],"tasks,":[182],"extracting":[183],"relevant":[185],"then":[188],"show":[189],"used":[195],"compression":[200,235],"level.":[201,236],"At":[202],"given":[203],"storage,":[204],"optimally":[208],"compressed":[209],"present":[211],"significantly":[213],"smaller":[214],"respect":[218],"original":[224],"data.":[225],"Finally,":[226],"investigate":[228],"potential":[230],"benefits":[231],"randomizing":[233]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
