{"id":"https://openalex.org/W2169593349","doi":"https://doi.org/10.1145/1739041.1739106","title":"Keyword search for data-centric XML collections with long text fields","display_name":"Keyword search for data-centric XML collections with long text fields","publication_year":2010,"publication_date":"2010-03-16","ids":{"openalex":"https://openalex.org/W2169593349","doi":"https://doi.org/10.1145/1739041.1739106","mag":"2169593349"},"language":"en","primary_location":{"id":"doi:10.1145/1739041.1739106","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1739041.1739106","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th International Conference on Extending Database Technology","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008664449","display_name":"Arash Termehchy","orcid":"https://orcid.org/0009-0007-2213-6303"},"institutions":[{"id":"https://openalex.org/I2801919071","display_name":"University of Illinois System","ror":"https://ror.org/05e94g991","country_code":"US","type":"education","lineage":["https://openalex.org/I2801919071"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Arash Termehchy","raw_affiliation_strings":["University of Illinois, Urbana, IL"],"affiliations":[{"raw_affiliation_string":"University of Illinois, Urbana, IL","institution_ids":["https://openalex.org/I2801919071"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011314280","display_name":"Marianne Winslett","orcid":"https://orcid.org/0000-0002-3935-7168"},"institutions":[{"id":"https://openalex.org/I2801919071","display_name":"University of Illinois System","ror":"https://ror.org/05e94g991","country_code":"US","type":"education","lineage":["https://openalex.org/I2801919071"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marianne Winslett","raw_affiliation_strings":["University of Illinois, Urbana, IL"],"affiliations":[{"raw_affiliation_string":"University of Illinois, Urbana, IL","institution_ids":["https://openalex.org/I2801919071"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5008664449"],"corresponding_institution_ids":["https://openalex.org/I2801919071"],"apc_list":null,"apc_paid":null,"fwci":2.36,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.89696537,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"537","last_page":"548"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8377865552902222},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.738714873790741},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.6765583157539368},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.613786518573761},{"id":"https://openalex.org/keywords/xml-database","display_name":"XML database","score":0.6060791015625},{"id":"https://openalex.org/keywords/efficient-xml-interchange","display_name":"Efficient XML Interchange","score":0.5478498935699463},{"id":"https://openalex.org/keywords/document-structure-description","display_name":"Document Structure Description","score":0.5210061073303223},{"id":"https://openalex.org/keywords/xml-validation","display_name":"XML validation","score":0.5200647711753845},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.4878876209259033},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.4817933440208435},{"id":"https://openalex.org/keywords/xml-framework","display_name":"XML framework","score":0.4198690950870514},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.34249380230903625},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.14792874455451965},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0695713460445404}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8377865552902222},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.738714873790741},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.6765583157539368},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.613786518573761},{"id":"https://openalex.org/C183068750","wikidata":"https://www.wikidata.org/wiki/Q357393","display_name":"XML database","level":3,"score":0.6060791015625},{"id":"https://openalex.org/C11508877","wikidata":"https://www.wikidata.org/wiki/Q1124477","display_name":"Efficient XML Interchange","level":3,"score":0.5478498935699463},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.5210061073303223},{"id":"https://openalex.org/C55348073","wikidata":"https://www.wikidata.org/wiki/Q595926","display_name":"XML validation","level":3,"score":0.5200647711753845},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.4878876209259033},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.4817933440208435},{"id":"https://openalex.org/C21761406","wikidata":"https://www.wikidata.org/wiki/Q8042330","display_name":"XML framework","level":3,"score":0.4198690950870514},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34249380230903625},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.14792874455451965},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0695713460445404},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1739041.1739106","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1739041.1739106","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th International Conference on Extending Database Technology","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"No poverty","id":"https://metadata.un.org/sdg/1","score":0.5199999809265137}],"awards":[{"id":"https://openalex.org/G1688342222","display_name":null,"funder_award_id":"938071","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1508044906","https://openalex.org/W1532325895","https://openalex.org/W1973828215","https://openalex.org/W1973867972","https://openalex.org/W1993705461","https://openalex.org/W1994190629","https://openalex.org/W1995041279","https://openalex.org/W2029817244","https://openalex.org/W2030766291","https://openalex.org/W2035814200","https://openalex.org/W2095439994","https://openalex.org/W2099015201","https://openalex.org/W2099111195","https://openalex.org/W2102489964","https://openalex.org/W2105819430","https://openalex.org/W2109464129","https://openalex.org/W2111333747","https://openalex.org/W2113112851","https://openalex.org/W2115221525","https://openalex.org/W2116066543","https://openalex.org/W2118302148","https://openalex.org/W2119370347","https://openalex.org/W2126391456","https://openalex.org/W2128034913","https://openalex.org/W2133378824","https://openalex.org/W2148126307","https://openalex.org/W2210278139","https://openalex.org/W2230830246","https://openalex.org/W6674851815","https://openalex.org/W6675928470","https://openalex.org/W6677888985"],"related_works":["https://openalex.org/W1501142348","https://openalex.org/W2378161351","https://openalex.org/W2115478919","https://openalex.org/W2401139123","https://openalex.org/W56234146","https://openalex.org/W1574467322","https://openalex.org/W198049983","https://openalex.org/W2371036544","https://openalex.org/W2053173742","https://openalex.org/W2593656835"],"abstract_inverted_index":{"Users":[0],"who":[1],"are":[2,22,50,97,194],"unfamiliar":[3],"with":[4,149],"database":[5],"query":[6],"languages":[7],"can":[8],"search":[9,79,174],"XML":[10,77,156,172],"data":[11,69,157,178],"sets":[12,158],"using":[13],"keyword":[14,78,173],"queries.":[15],"Current":[16],"approaches":[17,59,189],"for":[18,24,67,89,125,138,176,200],"supporting":[19],"such":[20,68],"queries":[21,152],"either":[23],"text-centric":[25],"XML,":[26,56,91],"where":[27,40],"the":[28,41,123,168,197],"structure":[29,42],"is":[30,43,101,163,203],"very":[31,44],"simple":[32],"and":[33,57,65,86,180,185],"long":[34,47,94,191],"text":[35,48,95,192],"fields":[36,49,96,193],"predominate;":[37],"or":[38],"data-centric,":[39],"rich.":[45],"However,":[46],"becoming":[51],"more":[52],"common":[53],"in":[54],"data-centric":[55,90,177],"existing":[58],"deliver":[60],"relatively":[61],"poor":[62],"precision,":[63,84,183],"recall,":[64,85,184],"ranking":[66,87,162,186],"sets.":[70],"In":[71,116],"this":[72,132],"paper,":[73],"we":[74,121,206],"introduce":[75],"an":[76],"method":[80,175],"that":[81,160],"provides":[82,181],"high":[83],"quality":[88],"even":[92],"when":[93,190],"present.":[98,195],"Our":[99,147],"approach":[100,199],"based":[102,142],"on":[103,143],"a":[104,117,126],"new":[105],"group":[106],"of":[107],"structural":[108],"relationships":[109],"called":[110],"normalized":[111],"term":[112],"presence":[113],"correlation":[114],"(NTPC).":[115],"one-time":[118],"setup":[119],"phase,":[120],"compute":[122,211],"NTPCs":[124,202,212],"representative":[127],"DB":[128],"instance,":[129],"then":[130],"use":[131],"information":[133],"to":[134,210],"rank":[135],"candidate":[136],"answers":[137],"all":[139],"subsequent":[140],"queries,":[141],"each":[144],"answer's":[145],"structure.":[146],"experiments":[148],"65":[150],"user-supplied":[151],"over":[153],"two":[154],"real-world":[155],"show":[159],"NTPC-based":[161],"always":[164],"as":[165,167],"effective":[166],"best":[169],"previously":[170],"available":[171],"sets,":[179],"better":[182],"than":[187],"previous":[188],"As":[196],"straightforward":[198],"computing":[201],"too":[204],"slow,":[205],"also":[207],"present":[208],"algorithms":[209],"efficiently.":[213]},"counts_by_year":[{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
