{"id":"https://openalex.org/W4244655784","doi":"https://doi.org/10.1109/icpp.2004.1327902","title":"Improving load/store queues usage in scientific computing","display_name":"Improving load/store queues usage in scientific computing","publication_year":2004,"publication_date":"2004-01-01","ids":{"openalex":"https://openalex.org/W4244655784","doi":"https://doi.org/10.1109/icpp.2004.1327902"},"language":"en","primary_location":{"id":"doi:10.1109/icpp.2004.1327902","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpp.2004.1327902","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Conference on Parallel Processing, 2004. ICPP 2004.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://inria.hal.science/inria-00637256","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036980800","display_name":"C. Lemuet","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"C. Lemuet","raw_affiliation_strings":["PRiSM Laboratory, University of Versailles, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PRiSM Laboratory, University of Versailles, France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003305069","display_name":"W. Jalby","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"W. Jalby","raw_affiliation_strings":["PRiSM Laboratory, University of Versailles, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PRiSM Laboratory, University of Versailles, France","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013591794","display_name":"Sid Touati","orcid":"https://orcid.org/0000-0002-7295-4474"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"S.-A.-A. Touati","raw_affiliation_strings":["PRiSM Laboratory, University of Versailles, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PRiSM Laboratory, University of Versailles, France","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.06,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.7846849,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"38","last_page":"45 vol.1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8677489161491394},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7106688618659973},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5638892650604248},{"id":"https://openalex.org/keywords/queue","display_name":"Queue","score":0.4974067509174347},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.43806740641593933},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.4147435128688812},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.17237040400505066}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8677489161491394},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7106688618659973},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5638892650604248},{"id":"https://openalex.org/C160403385","wikidata":"https://www.wikidata.org/wiki/Q220543","display_name":"Queue","level":2,"score":0.4974067509174347},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.43806740641593933},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.4147435128688812},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.17237040400505066}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icpp.2004.1327902","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpp.2004.1327902","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Conference on Parallel Processing, 2004. ICPP 2004.","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:inria-00637256v1","is_oa":true,"landing_page_url":"https://inria.hal.science/inria-00637256","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1327902&tag=1","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:inria-00637256v1","is_oa":true,"landing_page_url":"https://inria.hal.science/inria-00637256","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1327902&tag=1","raw_type":"Conference papers"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W1499323045","https://openalex.org/W2151635523","https://openalex.org/W4232059819","https://openalex.org/W4237625289","https://openalex.org/W4241596787","https://openalex.org/W4244227015","https://openalex.org/W4253546582"],"related_works":["https://openalex.org/W1595672120","https://openalex.org/W4230999561","https://openalex.org/W2133682266","https://openalex.org/W2497617944","https://openalex.org/W2167303720","https://openalex.org/W1563139915","https://openalex.org/W2109715593","https://openalex.org/W2061075966","https://openalex.org/W3147501184","https://openalex.org/W2268996566"],"abstract_inverted_index":{"Memory":[0],"disambiguation":[1,26,96],"mechanisms,":[2],"coupled":[3],"with":[4],"load/store":[5,150],"queues":[6],"in":[7,90,111,154],"out-of-order":[8],"processors,":[9,87],"are":[10,109,211],"crucial":[11],"to":[12,125,148],"increase":[13],"instruction":[14],"level":[15],"parallelism":[16],"(ILP),":[17],"especially":[18],"for":[19],"memory-bound":[20],"scientific":[21,215],"codes.":[22],"Designing":[23],"ideal":[24],"memory":[25,94,140],"mechanisms":[27,97],"is":[28,132,185,189],"too":[29],"complex":[30],"because":[31],"it":[32],"would":[33],"require":[34],"precise":[35],"address":[36,51,95],"bits":[37],"comparators;":[38],"thus,":[39],"modern":[40],"microprocessors":[41],"implement":[42],"simplified":[43],"and":[44,77,115],"imprecise":[45],"ones":[46],"that":[47,72,93,180,210],"perform":[48],"only":[49],"partial":[50],"comparisons.":[52],"In":[53],"this":[54,91],"paper,":[55],"we":[56,88,178],"study":[57],"the":[58,64,82,119,135,192,201],"impact":[59],"of":[60,67,85,137,143,203,213],"such":[61,71,204],"simplifications":[62],"on":[63,134,191,206],"sustained":[65],"performance":[66,101,120],"some":[68],"real":[69],"processors":[70],"Alpha":[73],"21264,":[74],"Power":[75],"4":[76],"Itanium":[78],"2.":[79],"Despite":[80],"all":[81],"advanced":[83],"features":[84],"these":[86],"demonstrate":[89,104],"article":[92],"can":[98,122],"cause":[99],"significant":[100],"loss.":[102],"We":[103],"that,":[105],"even":[106],"if":[107,129],"data":[108],"located":[110],"low":[112],"cache":[113],"levels":[114],"enough":[116],"ILP":[117],"exist,":[118],"degradation":[121],"be":[123],"up":[124],"21":[126],"times":[127],"slower":[128],"no":[130],"care":[131],"taken":[133],"order":[136],"accessing":[138],"independent":[139],"addresses.":[141],"Instead":[142],"proposing":[144],"a":[145,181],"hardware":[146],"solution":[147,188],"improve":[149],"queues,":[151],"as":[152],"done":[153],"[G.":[155],"Chrysos":[156],"et":[157,162,167,172],"al.,":[158,163,168,173],"(1998),":[159],"S.":[160,175],"Sethumadhavan":[161],"(2003),":[164,169],"I.":[165],"Park":[166],"A.":[170],"Yoaz":[171],"(1999),":[174],"Onder":[176],"(2002)],":[177],"show":[179],"software":[182],"(compilation)":[183],"technique":[184],"possible.":[186],"Such":[187],"based":[190],"classical":[193],"(and":[194],"robust)":[195],"Id/st":[196],"vectorization.":[197],"Our":[198],"experiments":[199],"highlight":[200],"effectiveness":[202],"method":[205],"BLAS":[207],"1":[208],"codes":[209],"representative":[212],"vector":[214],"loops.":[216]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
