{"id":"https://openalex.org/W2951596818","doi":"https://doi.org/10.1145/3292500.3330923","title":"Efficient Global String Kernel with Random Features","display_name":"Efficient Global String Kernel with Random Features","publication_year":2019,"publication_date":"2019-07-25","ids":{"openalex":"https://openalex.org/W2951596818","doi":"https://doi.org/10.1145/3292500.3330923","mag":"2951596818"},"language":"en","primary_location":{"id":"doi:10.1145/3292500.3330923","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3292500.3330923","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101478122","display_name":"Lingfei Wu","orcid":"https://orcid.org/0009-0008-8081-6275"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Lingfei Wu","raw_affiliation_strings":["IBM Research, Elmsford, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Elmsford, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073803011","display_name":"Ian En-Hsu Yen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ian En-Hsu Yen","raw_affiliation_strings":["CMU, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"CMU, Pittsburgh, PA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063213638","display_name":"Siyu Huo","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siyu Huo","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048756500","display_name":"Liang Zhao","orcid":"https://orcid.org/0000-0002-2648-9989"},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Liang Zhao","raw_affiliation_strings":["George Mason University, Fairfax, VA, USA"],"affiliations":[{"raw_affiliation_string":"George Mason University, Fairfax, VA, USA","institution_ids":["https://openalex.org/I162714631"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043893150","display_name":"Kun Xu","orcid":"https://orcid.org/0000-0002-1663-9998"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kun Xu","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100733564","display_name":"Liang Ma","orcid":"https://orcid.org/0000-0002-3048-5112"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Liang Ma","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058611515","display_name":"Shouling Ji","orcid":"https://orcid.org/0000-0003-4268-372X"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shouling Ji","raw_affiliation_strings":["Zhejiang University, Hangzhou, UNK, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, UNK, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028089542","display_name":"Char\u0173 C. Aggarwal","orcid":"https://orcid.org/0000-0003-2579-7581"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Charu Aggarwal","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101478122"],"corresponding_institution_ids":["https://openalex.org/I1341412227"],"apc_list":null,"apc_paid":null,"fwci":0.1659,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.42776482,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"520","last_page":"528"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6720643639564514},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6621966361999512},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.5109614133834839},{"id":"https://openalex.org/keywords/string-kernel","display_name":"String kernel","score":0.43385568261146545},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4133448600769043},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.35318633913993835},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3071150779724121},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2838449478149414},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.22374242544174194},{"id":"https://openalex.org/keywords/kernel-method","display_name":"Kernel method","score":0.21642544865608215},{"id":"https://openalex.org/keywords/variable-kernel-density-estimation","display_name":"Variable kernel density estimation","score":0.16729983687400818},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.08973810076713562}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6720643639564514},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6621966361999512},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.5109614133834839},{"id":"https://openalex.org/C55851704","wikidata":"https://www.wikidata.org/wiki/Q7623983","display_name":"String kernel","level":5,"score":0.43385568261146545},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4133448600769043},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35318633913993835},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3071150779724121},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2838449478149414},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.22374242544174194},{"id":"https://openalex.org/C122280245","wikidata":"https://www.wikidata.org/wiki/Q620622","display_name":"Kernel method","level":3,"score":0.21642544865608215},{"id":"https://openalex.org/C195699287","wikidata":"https://www.wikidata.org/wiki/Q7915722","display_name":"Variable kernel density estimation","level":4,"score":0.16729983687400818},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.08973810076713562},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3292500.3330923","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3292500.3330923","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1480643256","https://openalex.org/W1542652324","https://openalex.org/W1563088657","https://openalex.org/W1576213419","https://openalex.org/W1647671624","https://openalex.org/W1689711448","https://openalex.org/W1924770834","https://openalex.org/W1968326035","https://openalex.org/W1990470142","https://openalex.org/W1995918845","https://openalex.org/W1997102766","https://openalex.org/W2020816856","https://openalex.org/W2054698853","https://openalex.org/W2074231493","https://openalex.org/W2095705004","https://openalex.org/W2097827365","https://openalex.org/W2107725114","https://openalex.org/W2108837639","https://openalex.org/W2112545207","https://openalex.org/W2115933183","https://openalex.org/W2118585731","https://openalex.org/W2119290215","https://openalex.org/W2121950477","https://openalex.org/W2124225821","https://openalex.org/W2125970547","https://openalex.org/W2129250947","https://openalex.org/W2144902422","https://openalex.org/W2153635508","https://openalex.org/W2156279557","https://openalex.org/W2161195767","https://openalex.org/W2170960297","https://openalex.org/W2172140247","https://openalex.org/W2356696605","https://openalex.org/W2393444374","https://openalex.org/W2401290885","https://openalex.org/W2554813760","https://openalex.org/W2606823780","https://openalex.org/W2666296020","https://openalex.org/W2803475843","https://openalex.org/W2963013450","https://openalex.org/W2963722442","https://openalex.org/W3007254004","https://openalex.org/W4214671568","https://openalex.org/W4285719527","https://openalex.org/W6677656871","https://openalex.org/W6683794572"],"related_works":["https://openalex.org/W152351265","https://openalex.org/W3042295250","https://openalex.org/W2142835450","https://openalex.org/W2068282877","https://openalex.org/W1558063662","https://openalex.org/W1508740138","https://openalex.org/W2052014780","https://openalex.org/W2278188209","https://openalex.org/W2140241999","https://openalex.org/W4226175139"],"abstract_inverted_index":{"Analysis":[0],"of":[1,8,32,60,91,94,102,105,122,143,167,174,190,200,259,274,280],"large-scale":[2],"sequential":[3],"data":[4],"has":[5],"been":[6,80],"one":[7],"the":[9,36,61,73,83,89,103,135,144,165,168,172,181,257,272,275,278],"most":[10,93],"crucial":[11],"tasks":[12],"in":[13,35,100,110,134,230,250],"areas":[14],"such":[15,50],"as":[16,51,217],"bioinformatics,":[17],"text,":[18],"and":[19,153,213,277],"audio":[20],"mining.":[21],"Existing":[22],"string":[23,124,169,176],"kernels,":[24],"however,":[25],"either":[26],"(i)":[27,129],"rely":[28,66],"on":[29,67,238],"local":[30],"features":[31],"short":[33],"substructures":[34],"string,":[37,92],"which":[38,55],"hardly":[39],"capture":[40],"long":[41],"discriminative":[42],"patterns,":[43],"(ii)":[44,140],"sum":[45],"over":[46],"too":[47],"many":[48],"substructures,":[49],"all":[52],"possible":[53],"subsequences,":[54],"leads":[56],"to":[57,88,128,162,197,252],"diagonal":[58,149],"dominance":[59],"kernel":[62,151],"matrix,":[63,152],"or":[64,247],"(iii)":[65,154],"non-positive-definite":[68],"similarity":[69],"measures":[70],"derived":[71],"from":[72],"edit":[74],"distance.":[75],"Furthermore,":[76],"while":[77],"there":[78],"have":[79,155],"works":[81],"addressing":[82],"computational":[84,215],"challenge":[85],"with":[86,160,256,271],"respect":[87,161],"length":[90,166,279],"them":[95],"still":[96],"experience":[97],"quadratic":[98],"complexity":[99],"terms":[101],"number":[104,173,276],"training":[106,157,175],"samples":[107],"when":[108],"used":[109,229],"a":[111,119,148,156,188,198],"kernel-based":[112],"classifier.":[113],"In":[114,262],"this":[115,179,208],"paper,":[116],"we":[117,264],"present":[118],"new":[120],"class":[121],"global":[123,131,138],"kernels":[125,183,206],"that":[126,205,225,243,267],"aims":[127],"discover":[130],"properties":[132],"hidden":[133],"strings":[136,258],"through":[137,187],"alignments,":[139],"maintain":[141],"positive-definiteness":[142],"kernel,":[145],"without":[146],"introducing":[147],"dominant":[150],"cost":[158],"linear":[159,232],"not":[163],"only":[164],"but":[170],"also":[171],"samples.":[177],"To":[178],"end,":[180],"proposed":[182],"are":[184,210],"explicitly":[185],"defined":[186,207],"series":[189],"different":[191],"random":[192,201],"feature":[193],"maps,":[194],"each":[195],"corresponding":[196],"distribution":[199],"strings.":[202],"We":[203],"show":[204,266],"way":[209],"always":[211,219],"positive-definite,":[212],"exhibit":[214],"benefits":[216],"they":[218],"produce":[220],"Random":[221],"String":[222],"Embeddings":[223],"(RSE)":[224],"can":[226],"be":[227],"directly":[228],"any":[231],"classification":[233],"models.":[234],"Our":[235],"extensive":[236],"experiments":[237],"nine":[239],"benchmark":[240],"datasets":[241],"corroborate":[242],"RSE":[244,268],"achieves":[245],"better":[246],"comparable":[248],"accuracy":[249],"comparison":[251],"state-of-the-art":[253],"baselines,":[254],"especially":[255],"longer":[260],"lengths.":[261],"addition,":[263],"empirically":[265],"scales":[269],"linearly":[270],"increase":[273],"string.":[281]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
