{"id":"https://openalex.org/W4414360219","doi":"https://doi.org/10.24963/ijcai.2025/902","title":"Exploring the Trade-Offs: Quantization Methods, Task Difficulty, and Model Size in Large Language Models From Edge to Giant","display_name":"Exploring the Trade-Offs: Quantization Methods, Task Difficulty, and Model Size in Large Language Models From Edge to Giant","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414360219","doi":"https://doi.org/10.24963/ijcai.2025/902"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/902","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/902","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101558196","display_name":"Jemin Lee","orcid":"https://orcid.org/0000-0002-9332-3508"},"institutions":[{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jemin Lee","raw_affiliation_strings":["Electronics and Telecommunications Research Institute"],"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunications Research Institute","institution_ids":["https://openalex.org/I142401562"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006627490","display_name":"Sihyeong Park","orcid":"https://orcid.org/0000-0001-8244-4817"},"institutions":[{"id":"https://openalex.org/I4210131650","display_name":"Korea Electronics Technology Institute","ror":"https://ror.org/039k6f508","country_code":"KR","type":"facility","lineage":["https://openalex.org/I2801339556","https://openalex.org/I4210089395","https://openalex.org/I4210131650"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sihyeong Park","raw_affiliation_strings":["Korea Electronics Technology Institute"],"affiliations":[{"raw_affiliation_string":"Korea Electronics Technology Institute","institution_ids":["https://openalex.org/I4210131650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062228838","display_name":"Jinse Kwon","orcid":"https://orcid.org/0000-0003-3091-9926"},"institutions":[{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jinse Kwon","raw_affiliation_strings":["Electronics and Telecommunications Research Institute"],"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunications Research Institute","institution_ids":["https://openalex.org/I142401562"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090271472","display_name":"Jihun Oh","orcid":"https://orcid.org/0000-0001-6465-6736"},"institutions":[{"id":"https://openalex.org/I4210104803","display_name":"International University of Business and Law","ror":"https://ror.org/01ewram45","country_code":"UA","type":"education","lineage":["https://openalex.org/I4210104803"]}],"countries":["UA"],"is_corresponding":false,"raw_author_name":"Jihun Oh","raw_affiliation_strings":["Neubla"],"affiliations":[{"raw_affiliation_string":"Neubla","institution_ids":["https://openalex.org/I4210104803"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080783573","display_name":"Yongin Kwon","orcid":"https://orcid.org/0000-0003-2973-246X"},"institutions":[{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yongin Kwon","raw_affiliation_strings":["Electronics and Telecommunications Research Institute"],"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunications Research Institute","institution_ids":["https://openalex.org/I142401562"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101558196"],"corresponding_institution_ids":["https://openalex.org/I142401562"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13984742,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"8113","last_page":"8121"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8694000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8694000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.7520999908447266,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.8747000098228455},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6866000294685364},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6180999875068665},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4902999997138977},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.39329999685287476},{"id":"https://openalex.org/keywords/predictive-coding","display_name":"Predictive coding","score":0.35109999775886536},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.334199994802475}],"concepts":[{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.8747000098228455},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6866000294685364},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6865000128746033},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6180999875068665},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.555899977684021},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4902999997138977},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4828000068664551},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.39329999685287476},{"id":"https://openalex.org/C2778061373","wikidata":"https://www.wikidata.org/wiki/Q1315146","display_name":"Predictive coding","level":3,"score":0.35109999775886536},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3481000065803528},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.334199994802475},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.32519999146461487},{"id":"https://openalex.org/C2779714256","wikidata":"https://www.wikidata.org/wiki/Q25305062","display_name":"Multiple Models","level":2,"score":0.32089999318122864},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.2978000044822693},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2906000018119812},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.2671999931335449},{"id":"https://openalex.org/C63882131","wikidata":"https://www.wikidata.org/wiki/Q17122954","display_name":"Strengths and weaknesses","level":2,"score":0.26660001277923584}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/902","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/902","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Quantization":[0],"has":[1,22],"gained":[2],"attention":[3],"as":[4,89],"a":[5,33,46,138],"promising":[6],"solution":[7],"for":[8],"the":[9,90,130],"cost-effective":[10],"deployment":[11],"of":[12,36,49],"large":[13],"and":[14,31,82,96,149,161],"small":[15],"language":[16],"models.":[17],"However,":[18],"most":[19,91],"prior":[20],"work":[21],"been":[23],"limited":[24],"to":[25,54,99],"perplexity":[26],"or":[27],"basic":[28],"knowledge":[29],"tasks":[30,125],"lacks":[32],"comprehensive":[34,47],"evaluation":[35,48],"recent":[37],"models":[38,51,70,107,118],"like":[39],"Llama-3.3.":[40],"In":[41],"this":[42],"paper,":[43],"we":[44],"conduct":[45],"instruction-tuned":[50],"spanning":[52],"1B":[53],"405B":[55],"parameters,":[56],"applying":[57],"four":[58],"quantization":[59,136],"methods":[60],"across":[61,94],"13":[62],"datasets.":[63],"Our":[64],"findings":[65],"reveal":[66],"that":[67,135],"(1)":[68],"quantized":[69],"generally":[71],"surpass":[72],"smaller":[73,106],"FP16":[74],"baselines,":[75],"yet":[76],"they":[77],"often":[78],"struggle":[79],"with":[80,146],"instruction-following":[81],"hallucination":[83],"detection;":[84],"(2)":[85],"FP8":[86],"consistently":[87],"emerges":[88],"robust":[92],"option":[93],"tasks,":[95,163],"AWQ":[97],"tends":[98],"outperform":[100],"GPTQ":[101],"in":[102,159,169],"weight-only":[103],"quantization;":[104],"(3)":[105],"can":[108],"suffer":[109],"severe":[110],"accuracy":[111,132],"drops":[112],"at":[113],"4-bit":[114],"quantization,":[115],"while":[116],"70B-scale":[117],"maintain":[119],"stable":[120],"performance;":[121],"(4)":[122],"notably,":[123],"\\textit{hard}":[124],"do":[126],"not":[127],"always":[128],"experience":[129],"largest":[131],"losses,":[133],"indicating":[134],"magnifies":[137],"model\u2019s":[139],"inherent":[140],"weaknesses":[141],"rather":[142],"than":[143],"simply":[144],"correlating":[145],"task":[147],"difficulty;":[148],"(5)":[150],"an":[151],"LLM-based":[152],"judge":[153],"(MT-Bench)":[154],"highlights":[155],"significant":[156],"performance":[157],"declines":[158],"Coding":[160],"STEM":[162],"though":[164],"it":[165],"occasionally":[166],"reports":[167],"improvements":[168],"reasoning.":[170]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
