{"id":"https://openalex.org/W2419180118","doi":"https://doi.org/10.1109/tbdata.2016.2576470","title":"Sparse Computation for Large-Scale Data Mining","display_name":"Sparse Computation for Large-Scale Data Mining","publication_year":2016,"publication_date":"2016-06-01","ids":{"openalex":"https://openalex.org/W2419180118","doi":"https://doi.org/10.1109/tbdata.2016.2576470","mag":"2419180118"},"language":"en","primary_location":{"id":"doi:10.1109/tbdata.2016.2576470","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tbdata.2016.2576470","pdf_url":null,"source":{"id":"https://openalex.org/S2491400915","display_name":"IEEE Transactions on Big Data","issn_l":"2332-7790","issn":["2332-7790","2372-2096"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085640376","display_name":"Dorit S. Hochbaum","orcid":"https://orcid.org/0000-0002-2498-0512"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dorit S. Hochbaum","raw_affiliation_strings":["Department of Industrial Engineering and Operations Research, University of California, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Department of Industrial Engineering and Operations Research, University of California, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072687277","display_name":"Philipp Baumann","orcid":"https://orcid.org/0000-0002-3286-4474"},"institutions":[{"id":"https://openalex.org/I118564535","display_name":"University of Bern","ror":"https://ror.org/02k7v4d05","country_code":"CH","type":"education","lineage":["https://openalex.org/I118564535"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Philipp Baumann","raw_affiliation_strings":["Department of Business Administration, University of Bern, Bern, Switzerland"],"affiliations":[{"raw_affiliation_string":"Department of Business Administration, University of Bern, Bern, Switzerland","institution_ids":["https://openalex.org/I118564535"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5085640376"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":11.0479,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.98168977,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"2","issue":"2","first_page":"151","last_page":"174"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7499412298202515},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5520352721214294},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5304742455482483},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.44922345876693726},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.24649721384048462}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7499412298202515},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5520352721214294},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5304742455482483},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.44922345876693726},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.24649721384048462},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tbdata.2016.2576470","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tbdata.2016.2576470","pdf_url":null,"source":{"id":"https://openalex.org/S2491400915","display_name":"IEEE Transactions on Big Data","issn_l":"2332-7790","issn":["2332-7790","2372-2096"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Big Data","raw_type":"journal-article"},{"id":"pmh:oai:boris.unibe.ch:88124","is_oa":false,"landing_page_url":"https://boris.unibe.ch/88124/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401086","display_name":"Bern Open Repository and Information System (University of Bern)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I118564535","host_organization_name":"University of Bern","host_organization_lineage":["https://openalex.org/I118564535"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Hochbaum, Dorit S.; Baumann, Philipp (2016). Sparse computation for large-scale data mining. IEEE Transactions on Big Data, 2(2), pp. 151-174. Institute of Electrical and Electronics Engineers 10.1109/TBDATA.2016.2576470 &lt;http://dx.doi.org/10.1109/TBDATA.2016.2576470&gt;","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.5,"display_name":"Sustainable cities and communities"}],"awards":[{"id":"https://openalex.org/G7817270897","display_name":null,"funder_award_id":"CMMI-1200592","funder_id":"https://openalex.org/F4320309090","funder_display_name":"Center for Hierarchical Manufacturing, National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320309090","display_name":"Center for Hierarchical Manufacturing, National Science Foundation","ror":"https://ror.org/043trmd87"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W1560724230","https://openalex.org/W1566114229","https://openalex.org/W1570448133","https://openalex.org/W1581656968","https://openalex.org/W1599003247","https://openalex.org/W1633659671","https://openalex.org/W1909224475","https://openalex.org/W1967320885","https://openalex.org/W1970576574","https://openalex.org/W1970932011","https://openalex.org/W1982682305","https://openalex.org/W2002445283","https://openalex.org/W2036216970","https://openalex.org/W2038276547","https://openalex.org/W2058871925","https://openalex.org/W2070355323","https://openalex.org/W2090898720","https://openalex.org/W2101737005","https://openalex.org/W2113307832","https://openalex.org/W2117401405","https://openalex.org/W2119387367","https://openalex.org/W2119821739","https://openalex.org/W2121947440","https://openalex.org/W2122111042","https://openalex.org/W2122792499","https://openalex.org/W2129018774","https://openalex.org/W2129575457","https://openalex.org/W2132914434","https://openalex.org/W2143582647","https://openalex.org/W2147717514","https://openalex.org/W2147965279","https://openalex.org/W2153635508","https://openalex.org/W2160610500","https://openalex.org/W2218318129","https://openalex.org/W2343765202","https://openalex.org/W2979473749","https://openalex.org/W2983923309","https://openalex.org/W3120740533","https://openalex.org/W3195149063","https://openalex.org/W4239510810","https://openalex.org/W4246050513","https://openalex.org/W4256561644","https://openalex.org/W4297795643","https://openalex.org/W6633857196","https://openalex.org/W6635757637","https://openalex.org/W6636873521","https://openalex.org/W6639706480","https://openalex.org/W6676931166","https://openalex.org/W6688386640","https://openalex.org/W6788247690"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Leading":[0],"machine":[1,186],"learning":[2,187],"techniques":[3,49,188],"rely":[4],"on":[5,211,251],"inputs":[6],"in":[7,15,26,37,124,226,234,238,260],"the":[8,16,27,30,53,61,69,118,131,179,220,227,230,266,289,292],"form":[9],"of":[10,21,29,39,68,127,137,143,189,229,257,274,280,291],"pairwise":[11,22,109],"similarities":[12,23,104,113,159],"between":[13],"objects":[14,128,144,281],"data":[17,31,84,119,172,263,302],"set.":[18],"The":[19,111,163,253],"number":[20],"grows":[24],"quadratically":[25],"size":[28,290],"set":[32],"which":[33,100,125],"poses":[34],"a":[35,89,96,121,149,223,235,245],"challenge":[36],"terms":[38],"scalability.":[40],"One":[41],"way":[42],"to":[43,51,153,270,306],"achieve":[44],"practical":[45],"efficiency":[46],"for":[47,82,169,178,184,201,301],"similarity-based":[48],"is":[50,79,165,256,296],"sparsify":[52],"similarity":[54,63,98,140,231],"matrix.":[55,293],"However,":[56],"existing":[57],"sparsification":[58],"approaches":[59],"consider":[60],"complete":[62],"matrix":[64,99],"and":[65,76,78,156,193,198,200,240],"remove":[66],"some":[67],"non-zero":[70],"entries.":[71],"This":[72,294],"requires":[73],"quadratic":[74],"time":[75],"storage":[77],"thus":[80,157,287],"intractable":[81],"large-scale":[83],"sets.":[85,173],"We":[86,174],"introduce":[87],"here":[88,300],"method":[90],"called":[91],"sparse":[92,97,176],"computation":[93,177],"that":[94,129,145,219],"generates":[95],"contains":[101],"only":[102],"relevant":[103,112],"without":[105],"computing":[106],"first":[107],"all":[108],"similarities.":[110],"are":[114,135,151,160,282],"identified":[115],"by":[116,285],"projecting":[117],"onto":[120],"low-dimensional":[122,254],"space":[123],"groups":[126,273,279],"share":[130,148],"same":[132],"grid":[133,267],"neighborhood":[134,150],"deemed":[136],"potential":[138],"high":[139],"whereas":[141],"pairs":[142],"do":[146],"not":[147,161],"considered":[152],"be":[154],"dissimilar":[155],"their":[158],"computed.":[162],"projection":[164,255],"performed":[166],"efficiently":[167],"even":[168],"massively":[170,261],"large":[171,262],"apply":[175],"K-nearest":[180],"neighbors":[181],"algorithm":[182],"(KNN),":[183],"graph-based":[185],"supervised":[190],"normalized":[191,195],"cut":[192,196],"K-supervised":[194],"(SNC":[197],"KSNC)":[199],"support":[202],"vector":[203],"machines":[204],"with":[205],"radial":[206],"basis":[207],"function":[208],"kernels":[209],"(SVM),":[210],"realworld":[212],"classification":[213],"problems.":[214],"Our":[215],"empirical":[216],"results":[217],"show":[218],"approach":[221,295],"achieves":[222],"significant":[224],"reduction":[225,237],"density":[228],"matrix,":[232],"resulting":[233],"substantial":[236],"tuning":[239],"testing":[241],"times,":[242],"while":[243],"having":[244],"minimal":[246],"effect":[247],"(and":[248],"often":[249],"none)":[250],"accuracy.":[252],"further":[258],"use":[259],"sets":[264,303],"where":[265],"structure":[268],"allows":[269],"easily":[271],"identify":[272],"\u201calmost":[275],"identical\u201d":[276],"objects.":[277,309],"Such":[278],"then":[283],"replaced":[284],"representatives,":[286],"reducing":[288],"effective,":[297],"as":[298],"illustrated":[299],"comprising":[304],"up":[305],"8.5":[307],"million":[308]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":3}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
