{"id":"https://openalex.org/W2026326255","doi":"https://doi.org/10.1109/asap.2013.6567571","title":"Sparse matrix-vector multiply on the Texas Instruments C6678 Digital Signal Processor","display_name":"Sparse matrix-vector multiply on the Texas Instruments C6678 Digital Signal Processor","publication_year":2013,"publication_date":"2013-06-01","ids":{"openalex":"https://openalex.org/W2026326255","doi":"https://doi.org/10.1109/asap.2013.6567571","mag":"2026326255"},"language":"en","primary_location":{"id":"doi:10.1109/asap.2013.6567571","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asap.2013.6567571","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE 24th International Conference on Application-Specific Systems, Architectures and Processors","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038207720","display_name":"Yang Gao","orcid":"https://orcid.org/0000-0002-2150-5986"},"institutions":[{"id":"https://openalex.org/I155781252","display_name":"University of South Carolina","ror":"https://ror.org/02b6qw903","country_code":"US","type":"education","lineage":["https://openalex.org/I155781252"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yang Gao","raw_affiliation_strings":["Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, USA","institution_ids":["https://openalex.org/I155781252"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001266986","display_name":"Jason D. Bakos","orcid":"https://orcid.org/0000-0002-0821-6258"},"institutions":[{"id":"https://openalex.org/I155781252","display_name":"University of South Carolina","ror":"https://ror.org/02b6qw903","country_code":"US","type":"education","lineage":["https://openalex.org/I155781252"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason D. Bakos","raw_affiliation_strings":["Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, USA","institution_ids":["https://openalex.org/I155781252"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5038207720"],"corresponding_institution_ids":["https://openalex.org/I155781252"],"apc_list":null,"apc_paid":null,"fwci":1.2686,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.7988976,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"168","last_page":"174"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8248478174209595},{"id":"https://openalex.org/keywords/digital-signal-processing","display_name":"Digital signal processing","score":0.676607608795166},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.6433703899383545},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5525887608528137},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.5463398694992065},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.5336366891860962},{"id":"https://openalex.org/keywords/very-long-instruction-word","display_name":"Very long instruction word","score":0.5233404040336609},{"id":"https://openalex.org/keywords/digital-signal-processor","display_name":"Digital signal processor","score":0.522181510925293},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4921957552433014},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.4172684848308563},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3284795880317688}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8248478174209595},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.676607608795166},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.6433703899383545},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5525887608528137},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.5463398694992065},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.5336366891860962},{"id":"https://openalex.org/C170595534","wikidata":"https://www.wikidata.org/wiki/Q249743","display_name":"Very long instruction word","level":2,"score":0.5233404040336609},{"id":"https://openalex.org/C161611012","wikidata":"https://www.wikidata.org/wiki/Q106370","display_name":"Digital signal processor","level":3,"score":0.522181510925293},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4921957552433014},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.4172684848308563},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3284795880317688},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/asap.2013.6567571","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asap.2013.6567571","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE 24th International Conference on Application-Specific Systems, Architectures and Processors","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.364.8986","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.364.8986","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cse.sc.edu/~jbakos/papers/asap13_paper.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8700000047683716,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310009","display_name":"University of Florida","ror":"https://ror.org/02y3ad647"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1557692423","https://openalex.org/W1768849904","https://openalex.org/W1879455397","https://openalex.org/W1903794615","https://openalex.org/W1975116854","https://openalex.org/W1992809521","https://openalex.org/W2035080386","https://openalex.org/W2038071684","https://openalex.org/W2040556424","https://openalex.org/W2063084220","https://openalex.org/W2065321717","https://openalex.org/W2065770937","https://openalex.org/W2066804546","https://openalex.org/W2071004993","https://openalex.org/W2077012364","https://openalex.org/W2091908315","https://openalex.org/W2103877122","https://openalex.org/W2113921339","https://openalex.org/W2128539477","https://openalex.org/W2128556617","https://openalex.org/W2128853364","https://openalex.org/W2148570406","https://openalex.org/W2148651744","https://openalex.org/W2148701142","https://openalex.org/W2149921618","https://openalex.org/W2254254903","https://openalex.org/W2611119619","https://openalex.org/W4243261006","https://openalex.org/W6659374036","https://openalex.org/W6691950125"],"related_works":["https://openalex.org/W2139795029","https://openalex.org/W2115688358","https://openalex.org/W1973931517","https://openalex.org/W2393747601","https://openalex.org/W1503212777","https://openalex.org/W2796337470","https://openalex.org/W2072728786","https://openalex.org/W2146636354","https://openalex.org/W2066454338","https://openalex.org/W2751061704"],"abstract_inverted_index":{"The":[0],"Texas":[1],"Instruments":[2],"(TI)":[3],"C6678":[4,44],"\u201cShannon\u201d":[5],"is":[6,120,139,147,194],"TI's":[7],"most":[8],"recently-released":[9],"Digital":[10],"Signal":[11],"Processor":[12,84],"(DSP).":[13],"Although":[14],"its":[15,48,99,202,207,229],"original":[16],"purpose":[17],"was":[18],"voice":[19],"and":[20,23,52,68,81,101,109,206,222],"video":[21],"encoding":[22],"decoding,":[24],"it":[25,70],"may":[26,176],"have":[27],"the":[28,43,77,87,95,118,124,145,155,192,213,223],"potential":[29],"to":[30,71,122,196,227],"become":[31],"a":[32,56,61,73,150,160,172],"practical":[33],"coprocessor":[34],"for":[35,189],"scientific":[36],"computing.":[37],"In":[38,215],"this":[39,190,216],"paper,":[40],"we":[41,59,89,218],"evaluate":[42],"in":[45,126,136,149,159,171,182],"terms":[46],"of":[47,92,94,200],"programming":[49,224],"methodology,":[50],"performance,":[51],"power":[53,134,183],"efficiency.":[54,184],"As":[55],"case":[57],"study,":[58],"implemented":[60],"sparse":[62],"matrix":[63],"vector":[64],"multiply":[65],"(SpMV)":[66],"kernel":[67,221],"used":[69],"perform":[72],"comparative":[74],"study":[75],"against":[76],"NVIDIA":[78],"Kepler":[79],"GK104":[80],"GK106":[82],"Graphical":[83],"Units.":[85],"On":[86],"DSP,":[88],"take":[90],"advantage":[91],"many":[93],"C6678's":[96],"features,":[97],"including":[98],"VLIW":[100],"SIMD":[102],"instruction":[103],"set":[104],"architecture,":[105],"program-controlled":[106],"scratchpad":[107],"memory,":[108],"direct":[110],"memory":[111,204],"access":[112],"(DMA)":[113],"controller.":[114],"We":[115,164,185],"found":[116,187],"that":[117,144,166],"DSP":[119,146,193],"unable":[121],"outperform":[123],"GPUs":[125,156,181],"raw":[127],"performance":[128],"but":[129],"can":[130],"achieve":[131,197],"roughly":[132],"equal":[133],"efficiency":[135],"Gflops/Watt.":[137],"This":[138],"more":[140,178],"impressive":[141],"when":[142,169],"considering":[143],"manufactured":[148,158,170],"45":[151],"nm":[152,162],"process":[153],"while":[154],"are":[157],"28":[161],"process.":[163],"believe":[165],"subsequent":[167],"DSPs,":[168],"modern":[173],"fabrication":[174],"process,":[175],"be":[177],"competitive":[179],"with":[180,212],"also":[186],"that,":[188],"kernel,":[191],"able":[195],"higher":[198],"utilization":[199],"both":[201],"peak":[203],"bandwidth":[205],"functional":[208],"units":[209],"as":[210],"compared":[211],"GPUs.":[214],"paper":[217],"describe":[219],"our":[220],"techniques":[225],"required":[226],"optimize":[228],"performance.":[230]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2014,"cited_by_count":4}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
