{"id":"https://openalex.org/W2658182684","doi":"https://doi.org/10.1109/bigdata.2014.7004252","title":"Sparse computation for large-scale data mining","display_name":"Sparse computation for large-scale data mining","publication_year":2014,"publication_date":"2014-10-01","ids":{"openalex":"https://openalex.org/W2658182684","doi":"https://doi.org/10.1109/bigdata.2014.7004252","mag":"2658182684"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2014.7004252","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2014.7004252","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085640376","display_name":"Dorit S. Hochbaum","orcid":"https://orcid.org/0000-0002-2498-0512"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dorit S. Hochbaum","raw_affiliation_strings":["University of California, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072687277","display_name":"Philipp Baumann","orcid":"https://orcid.org/0000-0002-3286-4474"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Philipp Baumann","raw_affiliation_strings":["University of California, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5085640376"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":1.5778,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.89572403,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"354","last_page":"363"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7160390615463257},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.58689284324646},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5824275016784668},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4578515291213989},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1741589903831482},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.059052973985672}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7160390615463257},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.58689284324646},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5824275016784668},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4578515291213989},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1741589903831482},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.059052973985672},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2014.7004252","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2014.7004252","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6299999952316284,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1501972811","https://openalex.org/W1570448133","https://openalex.org/W1581656968","https://openalex.org/W1594924988","https://openalex.org/W1599003247","https://openalex.org/W1625660427","https://openalex.org/W1909224475","https://openalex.org/W1970576574","https://openalex.org/W1975900269","https://openalex.org/W1986096063","https://openalex.org/W2002445283","https://openalex.org/W2008046379","https://openalex.org/W2036216970","https://openalex.org/W2038276547","https://openalex.org/W2058871925","https://openalex.org/W2070355323","https://openalex.org/W2101737005","https://openalex.org/W2120377775","https://openalex.org/W2121947440","https://openalex.org/W2122111042","https://openalex.org/W2122792499","https://openalex.org/W2129018774","https://openalex.org/W2129575457","https://openalex.org/W2132914434","https://openalex.org/W2143582647","https://openalex.org/W2147717514","https://openalex.org/W2147965279","https://openalex.org/W3120740533","https://openalex.org/W6635757637","https://openalex.org/W6650844024"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W4402327032","https://openalex.org/W2382290278"],"abstract_inverted_index":{"Several":[0],"leading":[1],"data":[2,31,43,141],"mining":[3,142],"and":[4,118],"clustering":[5,144,168],"algorithms":[6,40,169],"rely":[7],"on":[8,270],"inputs":[9,275],"in":[10,26,94,139,170,182,204,247,255,259],"the":[11,18,27,30,60,65,83,87,112,125,131,152,157,180,183,190,197,229,241,248,251],"form":[12],"of":[13,20,29,54,64,89,99,115,166,219,250],"pairwise":[14,22,116,148],"similarities.":[15,133],"Yet,":[16],"since":[17],"number":[19],"potential":[21],"similarities":[23,62,117],"grows":[24],"quadratically":[25],"size":[28],"set,":[32],"it":[33],"is":[34,92,162,175,225],"computationally":[35],"prohibitive":[36],"to":[37,41,96,178,188,196,210,274],"apply":[38],"such":[39,150],"large":[42],"sets.":[44],"This":[45,160],"paper":[46],"addresses":[47],"this":[48],"challenge":[49],"with":[50,101,164],"a":[51,244,256,264,277],"novel":[52],"method":[53,70,128],"sparse":[55,126,184,220],"computation":[56,127,135,221],"that":[57,75,109,146,165,171,240],"computes":[58],"only":[59,130,177],"relevant":[61,132],"instead":[63],"complete":[66,278],"similarity":[67,185,252,279],"matrix.":[68,280],"The":[69,217],"employs":[71],"an":[72,77],"efficient":[73],"algorithm":[74,145,155],"provides":[76],"\u201capproximate":[78],"Principal":[79],"Component":[80],"Analysis\u201d.":[81],"In":[82],"low-dimensional":[84],"space":[85],"generated,":[86],"concept":[88],"grid":[90,172,199],"neighborhoods":[91,173,212],"applied":[93],"order":[95],"identify":[97,189],"groups":[98],"objects":[100,193],"potentially":[102],"high":[103],"similarity.":[104],"Unlike":[105],"known":[106],"sparsification":[107],"approaches":[108],"generate":[110],"first":[111],"full":[113],"set":[114],"thus":[119],"take":[120],"at":[121],"least":[122],"quadratic":[123],"time,":[124,261],"generates":[129],"Sparse":[134],"can":[136,194],"be":[137],"utilized":[138],"any":[140],"or":[143,156,207],"requires":[147],"similarities,":[149],"as":[151,272],"k-nearest":[153],"neighbors":[154],"spectral":[158],"method.":[159],"approach":[161,242],"contrasted":[163],"grid-based":[167],"proximity":[174],"used":[176],"determine":[179],"entries":[181],"matrix,":[186,253],"not":[187],"clusters.":[191],"Indeed":[192],"belong":[195,209],"same":[198],"neighborhood":[200],"while":[201,262],"ending":[202],"up":[203],"different":[205,211],"clusters,":[206],"conversely,":[208],"yet":[213],"get":[214],"clustered":[215],"jointly.":[216],"applicability":[218],"for":[222,228],"binary":[223],"classification":[224],"demonstrated":[226],"here":[227],"recently":[230],"devised":[231],"supervised":[232],"normalized":[233],"cut":[234],"(SNC).":[235],"Our":[236],"empirical":[237],"results":[238],"show":[239],"achieves":[243],"significant":[245],"reduction":[246,258],"density":[249],"resulting":[254],"substantial":[257],"running":[260],"having":[263],"minimal":[265],"effect":[266],"(and":[267],"often":[268],"none)":[269],"accuracy":[271],"compared":[273],"using":[276]},"counts_by_year":[{"year":2022,"cited_by_count":3},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
