{"id":"https://openalex.org/W4416252178","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228725","title":"k-Odd One Clear (k-OOC), A Novel GPU Kernel That Improves Quantization Accuracy And Speed Of GPTQ Algorithm","display_name":"k-Odd One Clear (k-OOC), A Novel GPU Kernel That Improves Quantization Accuracy And Speed Of GPTQ Algorithm","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416252178","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228725"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228725","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228725","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044859809","display_name":"L. Huynh","orcid":null},"institutions":[{"id":"https://openalex.org/I82497590","display_name":"Auburn University","ror":"https://ror.org/02v80fc35","country_code":"US","type":"education","lineage":["https://openalex.org/I82497590"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Loi Huynh","raw_affiliation_strings":["Auburn University,Department of Computer Science and Software Engineering,Auburn,AL"],"affiliations":[{"raw_affiliation_string":"Auburn University,Department of Computer Science and Software Engineering,Auburn,AL","institution_ids":["https://openalex.org/I82497590"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5044859809"],"corresponding_institution_ids":["https://openalex.org/I82497590"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.34610658,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.31869998574256897,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.31869998574256897,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.18160000443458557,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.1454000025987625,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.8051999807357788},{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.7027999758720398},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5835999846458435},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.43160000443458557},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.41909998655319214},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.38350000977516174},{"id":"https://openalex.org/keywords/clarity","display_name":"CLARITY","score":0.3515999913215637}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.8051999807357788},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7973999977111816},{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.7027999758720398},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5835999846458435},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5728999972343445},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.43160000443458557},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.41909998655319214},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.38350000977516174},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3716000020503998},{"id":"https://openalex.org/C2777146004","wikidata":"https://www.wikidata.org/wiki/Q14949826","display_name":"CLARITY","level":2,"score":0.3515999913215637},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3393999934196472},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3343999981880188},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.32030001282691956},{"id":"https://openalex.org/C93372532","wikidata":"https://www.wikidata.org/wiki/Q6552455","display_name":"Linde\u2013Buzo\u2013Gray algorithm","level":3,"score":0.3066999912261963},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.29820001125335693},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.290800005197525},{"id":"https://openalex.org/C2780580889","wikidata":"https://www.wikidata.org/wiki/Q41363","display_name":"Panorama","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27900001406669617},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.265500009059906},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.257999986410141},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.25540000200271606}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228725","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228725","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W1632114991","https://openalex.org/W2300242332","https://openalex.org/W3202442802","https://openalex.org/W4213016094"],"related_works":[],"abstract_inverted_index":{"The":[0],"Large":[1],"Language":[2],"Model":[3],"(LLM)":[4],"demonstrated":[5],"tremendously":[6],"useful":[7],"applications":[8],"in":[9,132],"nowadays":[10],"fast-evolving":[11],"AI-driven":[12],"technology.":[13],"As":[14],"model":[15,40,90],"sizes":[16],"grow":[17],"larger,":[18],"bigger":[19],"and":[20,56,118,169,188,208,214,219,233],"faster":[21,161,171],"GPUs":[22],"are":[23,62],"required.":[24],"Another":[25],"way":[26,127],"to":[27,77,82,111,136,143,180,211,223,241],"alleviate":[28],"this":[29,67],"issue":[30],"is":[31],"by":[32,231,238],"improving":[33],"the":[34,38,71,84,89,102,113,133,139,144,163,173,181,193,225,228,235,242],"compression":[35,114],"quality":[36],"of":[37,116,128,183,196,227,234,244],"trained":[39],"through":[41],"quantization":[42,64,85,120,140],"so":[43,87],"that":[44,88],"lower":[45],"VRAM":[46],"devices":[47],"can":[48,91,158],"run.":[49],"Quantization":[50],"paradigms":[51],"such":[52,204],"as":[53,205],"GPTQ,":[54],"PB-LLM,":[55],"BiLLM":[57],"(Hessian-based":[58],"with":[59,200],"structural":[60],"searching)":[61],"successful":[63],"mechanisms.":[65],"In":[66,98],"paper,":[68],"we":[69,100,148],"propose":[70],"OOC":[72,175],"technique,":[73],"which":[74],"enables":[75],"us":[76],"pick":[78],"an":[79],"\"odd\"":[80],"group":[81],"improve":[83,224],"clarity":[86],"have":[92],"a":[93,124,150],"better":[94],"reasoning":[95],"ability":[96],"overall.":[97],"addition,":[99],"define":[101],"Bit":[103],"Family":[104],"(A<sup":[105],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[106,109],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">lim</sup>,":[107],"A<sup":[108],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">max</sup>)":[110],"classify":[112],"rate":[115],"current":[117],"past":[119],"techniques,":[121],"thus":[122],"providing":[123],"more":[125],"objective":[126],"ranking":[129],"different":[130],"methodologies":[131],"literature.":[134],"Thirdly,":[135],"avoid":[137],"compromising":[138],"speed":[141],"due":[142,179],"scanning":[145],"process":[146],"overhead,":[147],"developed":[149],"specialized":[151],"fused":[152],"GPU":[153],"kernel":[154],"(k-OOC)":[155],"where":[156],"it":[157],"be":[159],"9x":[160],"than":[162,172],"original":[164],"GPTQ":[165,245],"implementation":[166,176],"(single-flow":[167],"mode)":[168,178],"22x":[170],"naive":[174],"(double-flow":[177],"incorporation":[182],"techniques":[184],"called":[185],"Row-Flow-Selection":[186],"Parallel":[187],"Input":[189],"Batching.":[190],"We":[191,221],"measured":[192],"perplexity":[194],"(PPL)":[195],"k-OOC":[197],"(2":[198,246],"bits)":[199],"14":[201],"major":[202],"models":[203,230,237],"OPT,":[206],"LLAMA,":[207],"Bloom":[209],"(125M":[210],"70B":[212],"parameters)":[213],"popular":[215],"datasets":[216],"(Wikitext2,":[217],"C4,":[218],"PTB).":[220],"managed":[222],"PPL":[226],"small":[229],"8.9%":[232],"big":[236],"4.1%":[239],"compared":[240],"baseline":[243],"bits).":[247]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-14T00:00:00"}
