{"id":"https://openalex.org/W1996186909","doi":"https://doi.org/10.1145/1061318.1061326","title":"XML stream processing using tree-edit distance embeddings","display_name":"XML stream processing using tree-edit distance embeddings","publication_year":2005,"publication_date":"2005-03-01","ids":{"openalex":"https://openalex.org/W1996186909","doi":"https://doi.org/10.1145/1061318.1061326","mag":"1996186909"},"language":"en","primary_location":{"id":"doi:10.1145/1061318.1061326","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1061318.1061326","pdf_url":null,"source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Database Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023079617","display_name":"Minos Garofalakis","orcid":"https://orcid.org/0000-0003-0285-3907"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Minos Garofalakis","raw_affiliation_strings":["Bell Labs, Lucent Technologies, Murray Hill, NJ","[Bell Laboratories, Lucent Technologies, Murray Hill, NJ]"],"affiliations":[{"raw_affiliation_string":"Bell Labs, Lucent Technologies, Murray Hill, NJ","institution_ids":[]},{"raw_affiliation_string":"[Bell Laboratories, Lucent Technologies, Murray Hill, NJ]","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029407896","display_name":"Amit Kumar","orcid":"https://orcid.org/0000-0002-3965-6627"},"institutions":[{"id":"https://openalex.org/I64295750","display_name":"Indian Institute of Technology Indore","ror":"https://ror.org/01hhf7w52","country_code":"IN","type":"education","lineage":["https://openalex.org/I64295750"]},{"id":"https://openalex.org/I68891433","display_name":"Indian Institute of Technology Delhi","ror":"https://ror.org/049tgcd06","country_code":"IN","type":"education","lineage":["https://openalex.org/I68891433"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Amit Kumar","raw_affiliation_strings":["Indian Institute of Technology, New Delhi, India","Indian Institute of Technology, New Delhi,India#TAB#"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology, New Delhi, India","institution_ids":["https://openalex.org/I68891433"]},{"raw_affiliation_string":"Indian Institute of Technology, New Delhi,India#TAB#","institution_ids":["https://openalex.org/I64295750"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5023079617"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.5812,"has_fulltext":false,"cited_by_count":46,"citation_normalized_percentile":{"value":0.96470743,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"30","issue":"1","first_page":"279","last_page":"332"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/edit-distance","display_name":"Edit distance","score":0.8535956144332886},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8106791973114014},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.5559132099151611},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.5409411787986755},{"id":"https://openalex.org/keywords/metric-space","display_name":"Metric space","score":0.5334123969078064},{"id":"https://openalex.org/keywords/k-ary-tree","display_name":"K-ary tree","score":0.49494093656539917},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4855186939239502},{"id":"https://openalex.org/keywords/joins","display_name":"Joins","score":0.44620436429977417},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4424106478691101},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.4215899705886841},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4161669611930847},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4000810384750366},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3899618685245514},{"id":"https://openalex.org/keywords/tree-structure","display_name":"Tree structure","score":0.30826669931411743},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15262210369110107},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12579560279846191},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.11894172430038452},{"id":"https://openalex.org/keywords/binary-tree","display_name":"Binary tree","score":0.09170442819595337},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.08645060658454895}],"concepts":[{"id":"https://openalex.org/C44359876","wikidata":"https://www.wikidata.org/wiki/Q5338467","display_name":"Edit distance","level":2,"score":0.8535956144332886},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8106791973114014},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.5559132099151611},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.5409411787986755},{"id":"https://openalex.org/C198043062","wikidata":"https://www.wikidata.org/wiki/Q180953","display_name":"Metric space","level":2,"score":0.5334123969078064},{"id":"https://openalex.org/C100560664","wikidata":"https://www.wikidata.org/wiki/Q3608019","display_name":"K-ary tree","level":4,"score":0.49494093656539917},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4855186939239502},{"id":"https://openalex.org/C2778692605","wikidata":"https://www.wikidata.org/wiki/Q4041866","display_name":"Joins","level":2,"score":0.44620436429977417},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4424106478691101},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.4215899705886841},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4161669611930847},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4000810384750366},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3899618685245514},{"id":"https://openalex.org/C163797641","wikidata":"https://www.wikidata.org/wiki/Q2067937","display_name":"Tree structure","level":3,"score":0.30826669931411743},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15262210369110107},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12579560279846191},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.11894172430038452},{"id":"https://openalex.org/C197855036","wikidata":"https://www.wikidata.org/wiki/Q380172","display_name":"Binary tree","level":2,"score":0.09170442819595337},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.08645060658454895},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C194257627","wikidata":"https://www.wikidata.org/wiki/Q211554","display_name":"Amplifier","level":3,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1061318.1061326","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1061318.1061326","pdf_url":null,"source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Database Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":65,"referenced_works":["https://openalex.org/W9273873","https://openalex.org/W1214703565","https://openalex.org/W1484819834","https://openalex.org/W1493197938","https://openalex.org/W1493892051","https://openalex.org/W1495913768","https://openalex.org/W1506882104","https://openalex.org/W1524520778","https://openalex.org/W1530239281","https://openalex.org/W1536741282","https://openalex.org/W1540310433","https://openalex.org/W1551385575","https://openalex.org/W1586825695","https://openalex.org/W1749554492","https://openalex.org/W1766932551","https://openalex.org/W1784244685","https://openalex.org/W1785933978","https://openalex.org/W1972418517","https://openalex.org/W1975887898","https://openalex.org/W1976373002","https://openalex.org/W1998244781","https://openalex.org/W1998783890","https://openalex.org/W2001474264","https://openalex.org/W2013092187","https://openalex.org/W2015180052","https://openalex.org/W2020584928","https://openalex.org/W2043481183","https://openalex.org/W2047424291","https://openalex.org/W2053075747","https://openalex.org/W2061135126","https://openalex.org/W2064379477","https://openalex.org/W2069980026","https://openalex.org/W2092057784","https://openalex.org/W2106163100","https://openalex.org/W2107683037","https://openalex.org/W2110557355","https://openalex.org/W2112452856","https://openalex.org/W2117242430","https://openalex.org/W2122929038","https://openalex.org/W2126105096","https://openalex.org/W2129097092","https://openalex.org/W2132837277","https://openalex.org/W2135611729","https://openalex.org/W2139433757","https://openalex.org/W2146801187","https://openalex.org/W2148706674","https://openalex.org/W2156664231","https://openalex.org/W2156943642","https://openalex.org/W2161936973","https://openalex.org/W2164501930","https://openalex.org/W2230830246","https://openalex.org/W2295428206","https://openalex.org/W2582397176","https://openalex.org/W2752853835","https://openalex.org/W2915063781","https://openalex.org/W2979473749","https://openalex.org/W2989713547","https://openalex.org/W3212699936","https://openalex.org/W4214936872","https://openalex.org/W4231916799","https://openalex.org/W4234667859","https://openalex.org/W4241185933","https://openalex.org/W4245266864","https://openalex.org/W4285719527","https://openalex.org/W4372267129"],"related_works":["https://openalex.org/W4386026606","https://openalex.org/W2897158376","https://openalex.org/W1985051577","https://openalex.org/W2126396628","https://openalex.org/W2092057784","https://openalex.org/W2028509346","https://openalex.org/W1996186909","https://openalex.org/W1642324455","https://openalex.org/W124283286","https://openalex.org/W3094110505"],"abstract_inverted_index":{"We":[0,82],"propose":[1],"the":[2,7,70,120,131,161,183,190,211],"first":[3,191],"known":[4,94],"solution":[5,38],"to":[6,98],"problem":[8],"of":[9,16,36,61,104,133,164,185],"correlating,":[10],"in":[11,91,123,210],"small":[12],"space,":[13],"continuous":[14,138],"streams":[15],"XML":[17,108,139,153,208],"data":[18,75,109,154,209],"through":[19,205],"approximate":[20,124,130],"(structure":[21],"and":[22,128,151,201],"content)":[23],"matching,":[24],"as":[25,115],"defined":[26],"by":[27],"a":[28,40,57,101,105,116],"general":[29],"tree-edit":[30,46,125,198],"distance":[31,47,71,126,199],"metric.":[32],"The":[33],"key":[34],"element":[35],"our":[37,85,157,165,177,186],"is":[39,168],"novel":[41],"algorithm":[42,87],"for":[43,119,197],"obliviously":[44],"embedding":[45,86,166],"metrics":[48],"into":[49],"an":[50,145],"L":[51],"1":[52],"vector":[53],"space":[54],"while":[55],"guaranteeing":[56],"(worst-case)":[58],"upper":[59],"bound":[60],"O":[62],"(log":[63],"2":[64],"n":[65,67,80],"log*":[66],")":[68],"on":[69,194,202],"distortion":[72,180],"between":[73],"any":[74],"trees":[76,155],"with":[77,93,148],"at":[78],"most":[79],"nodes.":[81],"demonstrate":[83],"how":[84],"can":[88,112],"be":[89,113,174],"applied":[90],"conjunction":[92],"random":[95],"sketching":[96],"techniques":[97,167],"(1)":[99],"build":[100],"compact":[102],"synopsis":[103],"massive,":[106],"streaming":[107,212],"tree":[110,122],"that":[111,160],"used":[114],"concise":[117],"surrogate":[118],"full":[121],"computations;":[127],"(2)":[129],"result":[132],"tree-edit-distance":[134],"similarity":[135,206],"joins":[136],"over":[137],"document":[140],"streams.":[141],"Experimental":[142],"results":[143,193],"from":[144,176],"empirical":[146],"study":[147],"both":[149],"synthetic":[150],"real-life":[152],"validate":[156],"approach,":[158],"demonstrating":[159],"average-case":[162],"behavior":[163],"much":[169],"better":[170],"than":[171],"what":[172],"would":[173],"predicted":[175],"theoretical":[178],"worst-case":[179],"bounds.":[181],"To":[182],"best":[184],"knowledge,":[187],"these":[188],"are":[189],"algorithmic":[192],"low-distortion":[195],"embeddings":[196],"metrics,":[200],"correlating":[203],"(e.g.,":[204],"joins)":[207],"model.":[213]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
