{"id":"https://openalex.org/W7161033828","doi":"https://doi.org/10.48550/arxiv.2605.12464","title":"Search Your Block Floating Point Scales!","display_name":"Search Your Block Floating Point Scales!","publication_year":2026,"publication_date":"2026-05-12","ids":{"openalex":"https://openalex.org/W7161033828","doi":"https://doi.org/10.48550/arxiv.2605.12464"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.12464","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12464","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.12464","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063233370","display_name":"Tanmaey Gupta","orcid":"https://orcid.org/0009-0004-2944-1632"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gupta, Tanmaey","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116176184","display_name":"Hayden Prairie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prairie, Hayden","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136025582","display_name":"Xiaoxia Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Xiaoxia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082159350","display_name":"Reyna Abhyankar","orcid":"https://orcid.org/0009-0005-6763-0108"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abhyankar, Reyna","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136035403","display_name":"Qingyang Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Qingyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120326613","display_name":"Austin Silveria","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Silveria, Austin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014477939","display_name":"Pragaash Ponnusamy","orcid":"https://orcid.org/0000-0002-3790-5757"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ponnusamy, Pragaash","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136035024","display_name":"Jue Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136055100","display_name":"Ben Athiwaratkun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Athiwaratkun, Ben","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059669640","display_name":"Leon Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Leon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136059673","display_name":"Tri Dao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dao, Tri","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032865467","display_name":"Daniel Y. Fu","orcid":"https://orcid.org/0000-0002-2500-2577"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Daniel Y.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127875884","display_name":"Chris De Sa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"De Sa, Chris","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5063233370"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6139000058174133,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6139000058174133,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.04470000043511391,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.02800000086426735,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.8592000007629395},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6093999743461609},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.5874999761581421},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.41620001196861267},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4115999937057495},{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.40939998626708984}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.8592000007629395},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.6222000122070312},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6093999743461609},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.5874999761581421},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5419999957084656},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.41620001196861267},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4115999937057495},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.40939998626708984},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.388700008392334},{"id":"https://openalex.org/C93372532","wikidata":"https://www.wikidata.org/wiki/Q6552455","display_name":"Linde\u2013Buzo\u2013Gray algorithm","level":3,"score":0.3634999990463257},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.27869999408721924},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.2736999988555908},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.2662000060081482},{"id":"https://openalex.org/C5317259","wikidata":"https://www.wikidata.org/wiki/Q4462361","display_name":"Trellis quantization","level":5,"score":0.26589998602867126},{"id":"https://openalex.org/C61005703","wikidata":"https://www.wikidata.org/wiki/Q2145211","display_name":"Round-off error","level":2,"score":0.25999999046325684},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.12464","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12464","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.12464","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12464","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7792232632637024,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Quantization":[0,111],"has":[1],"emerged":[2],"as":[3,108],"a":[4,40,80],"standard":[5],"technique":[6],"for":[7,10,29,74,95,143,156,168,180],"accelerating":[8],"inference":[9],"generative":[11],"models":[12],"by":[13,154,163,176],"enabling":[14],"faster":[15],"low-precision":[16,113],"computations":[17],"and":[18,112,115,134,158],"reduced":[19],"memory":[20],"transfers.":[21],"Recently,":[22],"GPU":[23],"accelerators":[24],"have":[25],"added":[26],"first-class":[27],"support":[28],"microscaling":[30,88],"Block":[31],"Floating":[32],"Point":[33],"(BFP)":[34],"formats.":[35],"Standard":[36],"BFP":[37],"algorithms":[38],"use":[39],"fixed":[41],"scale":[42,55,77],"based":[43],"on":[44],"the":[45,49,84,92,96],"maximum":[46],"magnitude":[47],"of":[48],"block.":[50],"We":[51],"observe":[52],"that":[53,149],"this":[54,66],"choice":[56],"can":[57,100],"be":[58,101],"suboptimal":[59],"with":[60,103],"respect":[61],"to":[62,90,118,138,165],"quantization":[63,93,105,152,193],"errors.":[64],"In":[65],"work,":[67],"we":[68,123],"propose":[69],"ScaleSearch,":[70],"an":[71,126],"alternative":[72],"strategy":[73],"selecting":[75],"these":[76],"factors:":[78],"using":[79],"fine-grained":[81],"search":[82],"leveraging":[83],"mantissa":[85],"bits":[86],"in":[87],"formats":[89],"minimize":[91],"error":[94,153],"given":[97],"distribution.":[98],"ScaleSearch":[99,133,150],"integrated":[102],"existing":[104],"methods":[106,186],"such":[107],"Post":[109],"Training":[110],"attention,":[114],"is":[116],"shown":[117],"improve":[119],"their":[120],"performance.":[121],"Additionally,":[122],"introduce":[124],"ScaleSearchAttention,":[125],"accelerated":[127],"NVFP4-based":[128],"attention":[129],"algorithm,":[130],"which":[131],"uses":[132],"adapted":[135],"prior":[136],"techniques":[137],"ensure":[139],"near-0":[140],"performance":[141,190],"loss":[142],"causal":[144],"language":[145,160],"modeling.":[146],"Experiments":[147],"show":[148],"reduces":[151],"27%":[155],"NVFP4":[157],"improves":[159,173],"model":[161],"PTQ":[162],"up":[164],"15":[166],"points":[167,179],"MATH500":[169],"(Qwen3-8B),":[170],"while":[171,191],"ScaleSearchAttention":[172],"Wikitext-2":[174],"PPL":[175],"upto":[177],"0.77":[178],"Llama":[181],"3.1":[182],"70B.":[183],"The":[184],"proposed":[185],"closely":[187],"match":[188],"baseline":[189],"providing":[192],"accuracy":[194],"improvements.":[195]},"counts_by_year":[],"updated_date":"2026-05-14T06:16:12.342656","created_date":"2026-05-14T00:00:00"}
