{"id":"https://openalex.org/W2169839551","doi":"https://doi.org/10.1147/jrd.2013.2247232","title":"Novel document detection for massive data streams using distributed dictionary learning","display_name":"Novel document detection for massive data streams using distributed dictionary learning","publication_year":2013,"publication_date":"2013-05-01","ids":{"openalex":"https://openalex.org/W2169839551","doi":"https://doi.org/10.1147/jrd.2013.2247232","mag":"2169839551"},"language":"en","primary_location":{"id":"doi:10.1147/jrd.2013.2247232","is_oa":false,"landing_page_url":"https://doi.org/10.1147/jrd.2013.2247232","pdf_url":null,"source":{"id":"https://openalex.org/S4210219925","display_name":"IBM Journal of Research and Development","issn_l":"0018-8646","issn":["0018-8646","2151-8556"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320652","host_organization_name":"IBM","host_organization_lineage":["https://openalex.org/P4310320652"],"host_organization_lineage_names":["IBM"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IBM Journal of Research and Development","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036801391","display_name":"Shiva Prasad Kasiviswanathan","orcid":"https://orcid.org/0000-0002-1725-2621"},"institutions":[{"id":"https://openalex.org/I4210134512","display_name":"GE Global Research (United States)","ror":"https://ror.org/03e06qt98","country_code":"US","type":"company","lineage":["https://openalex.org/I4210134512"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"S. P. Kasiviswanathan","raw_affiliation_strings":["GE Global Research Center, Camino Ramon, CA, USA"],"affiliations":[{"raw_affiliation_string":"GE Global Research Center, Camino Ramon, CA, USA","institution_ids":["https://openalex.org/I4210134512"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089801879","display_name":"Guojing Cong","orcid":"https://orcid.org/0000-0003-0850-7714"},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"G. Cong","raw_affiliation_strings":["IBM Research Division, Thomas J. Watson Research Center, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research Division, Thomas J. Watson Research Center, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112555367","display_name":"Prem Melville","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"P. Melville","raw_affiliation_strings":["IBM Research Division, Thomas J. Watson Research Center, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research Division, Thomas J. Watson Research Center, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111487759","display_name":"Richard D. Lawrence","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"R. D. Lawrence","raw_affiliation_strings":["IBM Research Division, Thomas J. Watson Research Center, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research Division, Thomas J. Watson Research Center, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I4210114115"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5036801391"],"corresponding_institution_ids":["https://openalex.org/I4210134512"],"apc_list":null,"apc_paid":null,"fwci":0.8165,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.78204021,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"57","issue":"3/4","first_page":"9:1","last_page":"9:15"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8673892021179199},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6744565963745117},{"id":"https://openalex.org/keywords/data-stream-mining","display_name":"Data stream mining","score":0.6473211050033569},{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.5303094387054443},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.5077277421951294},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4986543655395508},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.48361554741859436},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.4813476502895355},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4812895357608795},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.478375107049942},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4712258577346802},{"id":"https://openalex.org/keywords/volume","display_name":"Volume (thermodynamics)","score":0.46397385001182556},{"id":"https://openalex.org/keywords/neural-coding","display_name":"Neural coding","score":0.4264594614505768},{"id":"https://openalex.org/keywords/document-classification","display_name":"Document classification","score":0.41614609956741333},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31669360399246216},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.19261124730110168}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8673892021179199},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6744565963745117},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.6473211050033569},{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.5303094387054443},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5077277421951294},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4986543655395508},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.48361554741859436},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.4813476502895355},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4812895357608795},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.478375107049942},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4712258577346802},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.46397385001182556},{"id":"https://openalex.org/C77637269","wikidata":"https://www.wikidata.org/wiki/Q7002051","display_name":"Neural coding","level":2,"score":0.4264594614505768},{"id":"https://openalex.org/C2780479914","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Document classification","level":2,"score":0.41614609956741333},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31669360399246216},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.19261124730110168},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C27206212","wikidata":"https://www.wikidata.org/wiki/Q34178","display_name":"Theology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1147/jrd.2013.2247232","is_oa":false,"landing_page_url":"https://doi.org/10.1147/jrd.2013.2247232","pdf_url":null,"source":{"id":"https://openalex.org/S4210219925","display_name":"IBM Journal of Research and Development","issn_l":"0018-8646","issn":["0018-8646","2151-8556"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320652","host_organization_name":"IBM","host_organization_lineage":["https://openalex.org/P4310320652"],"host_organization_lineage_names":["IBM"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IBM Journal of Research and Development","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7200000286102295,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W49342364","https://openalex.org/W1520511539","https://openalex.org/W1532325895","https://openalex.org/W1594112393","https://openalex.org/W1599867596","https://openalex.org/W1603765807","https://openalex.org/W1612003148","https://openalex.org/W1880262756","https://openalex.org/W1902027874","https://openalex.org/W1946620893","https://openalex.org/W1963766209","https://openalex.org/W1978259121","https://openalex.org/W1983008863","https://openalex.org/W2005499394","https://openalex.org/W2018165284","https://openalex.org/W2026302857","https://openalex.org/W2050619059","https://openalex.org/W2059503205","https://openalex.org/W2096586829","https://openalex.org/W2105464873","https://openalex.org/W2107533994","https://openalex.org/W2107628405","https://openalex.org/W2112447569","https://openalex.org/W2113606819","https://openalex.org/W2117242598","https://openalex.org/W2129812935","https://openalex.org/W2160547390","https://openalex.org/W2164278908","https://openalex.org/W2613214602","https://openalex.org/W2949483514","https://openalex.org/W2953102514","https://openalex.org/W3029645440"],"related_works":["https://openalex.org/W2381242807","https://openalex.org/W3126131230","https://openalex.org/W2347541121","https://openalex.org/W4288804799","https://openalex.org/W2080951048","https://openalex.org/W3089617106","https://openalex.org/W3032237421","https://openalex.org/W3011883280","https://openalex.org/W2390346111","https://openalex.org/W2369082698"],"abstract_inverted_index":{"Given":[0],"the":[1,19,26,38,41,87,130,152,157,186,193,202],"high":[2],"volume":[3],"of":[4,40,59,80,97,101,118,127,129,140,204],"content":[5],"being":[6],"generated":[7],"online,":[8],"it":[9],"becomes":[10],"necessary":[11],"to":[12,16,22,37,48,74,137,155],"employ":[13],"automated":[14],"techniques":[15],"separate":[17],"out":[18],"documents":[20,60,88,141],"belonging":[21],"novel":[23],"topics":[24],"from":[25,182],"background":[27],"discussion,":[28],"in":[29,55],"a":[30,46,57,76,94,98,104,114,124],"robust":[31],"and":[32,146,149,172,179,196],"scalable":[33],"manner":[34],"(with":[35],"respect":[36],"size":[39],"document":[42,63,106],"set).":[43],"We":[44,132,161],"present":[45],"solution":[47],"this":[49,122,135],"challenge":[50],"based":[51],"on":[52,165,176,185,192],"sparse":[53,115,144],"coding,":[54],"which":[56],"stream":[58],"(where":[61],"each":[62],"is":[64,123],"modeled":[65],"as":[66,113],"an":[67],"m-dimensional":[68],"vector":[69],"y)":[70],"can":[71,89,200],"be":[72,90,108],"used":[73],"learn":[75],"dictionary":[77,147],"matrix":[78],"A":[79],"dimension":[81],"m":[82],"\u00d7":[83],"k,":[84],"such":[85],"that":[86,199],"approximately":[91],"represented":[92,109],"by":[93,142,150],"linear":[95,116],"combination":[96,117],"few":[99],"columns":[100],"A.":[102],"If":[103],"new":[105],"cannot":[107],"with":[110,169],"low":[111],"error":[112],"these":[119],"columns,":[120],"then":[121],"strong":[125],"indicator":[126],"novelty":[128],"document.":[131],"scale":[133],"up":[134],"approach":[136,175],"handle":[138],"millions":[139],"parallelizing":[143],"coding":[145],"learning,":[148],"using":[151],"alternating-directions":[153],"method":[154],"solve":[156],"resulting":[158],"optimization":[159,195],"problems.":[160],"conduct":[162],"our":[163,174,190],"experiments":[164],"high-performance":[166],"computing":[167],"clusters":[168],"differing":[170],"architectures":[171],"evaluate":[173],"news":[177],"streams":[178],"streaming":[180],"data":[181,208],"Twitter\u00ae.":[183],"Based":[184],"analysis,":[187],"we":[188],"share":[189],"insights":[191],"distributed":[194],"machine":[197],"architecture":[198],"help":[201],"design":[203],"exascale":[205],"systems":[206],"supporting":[207],"analytics.":[209]},"counts_by_year":[{"year":2017,"cited_by_count":3},{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
