{"id":"https://openalex.org/W2116066543","doi":"https://doi.org/10.1145/1135777.1135882","title":"Optimizing scoring functions and indexes for proximity search in type-annotated corpora","display_name":"Optimizing scoring functions and indexes for proximity search in type-annotated corpora","publication_year":2006,"publication_date":"2006-05-23","ids":{"openalex":"https://openalex.org/W2116066543","doi":"https://doi.org/10.1145/1135777.1135882","mag":"2116066543"},"language":"en","primary_location":{"id":"doi:10.1145/1135777.1135882","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1135777.1135882","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th international conference on World Wide Web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101457009","display_name":"Soumen Chakrabarti","orcid":"https://orcid.org/0000-0002-9419-7486"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Soumen Chakrabarti","raw_affiliation_strings":["IIT Bombay","IIT-Bombay"],"affiliations":[{"raw_affiliation_string":"IIT Bombay","institution_ids":["https://openalex.org/I162827531"]},{"raw_affiliation_string":"IIT-Bombay","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078995359","display_name":"Kriti Puniyani","orcid":null},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Kriti Puniyani","raw_affiliation_strings":["IIT Bombay","IIT-Bombay"],"affiliations":[{"raw_affiliation_string":"IIT Bombay","institution_ids":["https://openalex.org/I162827531"]},{"raw_affiliation_string":"IIT-Bombay","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038097905","display_name":"Sujatha Das","orcid":null},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sujatha Das","raw_affiliation_strings":["IIT Bombay","IIT-Bombay"],"affiliations":[{"raw_affiliation_string":"IIT Bombay","institution_ids":["https://openalex.org/I162827531"]},{"raw_affiliation_string":"IIT-Bombay","institution_ids":["https://openalex.org/I162827531"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101457009"],"corresponding_institution_ids":["https://openalex.org/I162827531"],"apc_list":null,"apc_paid":null,"fwci":6.6788,"has_fulltext":false,"cited_by_count":66,"citation_normalized_percentile":{"value":0.97378445,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"717","last_page":"726"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8468143939971924},{"id":"https://openalex.org/keywords/wordnet","display_name":"WordNet","score":0.6789787411689758},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5939382314682007},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5695805549621582},{"id":"https://openalex.org/keywords/skew","display_name":"Skew","score":0.5250527858734131},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4967225193977356},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4902406334877014},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.4779134690761566},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.4513019621372223},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4505893290042877},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4494282603263855},{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.4278275966644287},{"id":"https://openalex.org/keywords/query-expansion","display_name":"Query expansion","score":0.426597535610199},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.16316816210746765}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8468143939971924},{"id":"https://openalex.org/C157659113","wikidata":"https://www.wikidata.org/wiki/Q533822","display_name":"WordNet","level":2,"score":0.6789787411689758},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5939382314682007},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5695805549621582},{"id":"https://openalex.org/C43711488","wikidata":"https://www.wikidata.org/wiki/Q7534783","display_name":"Skew","level":2,"score":0.5250527858734131},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4967225193977356},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4902406334877014},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.4779134690761566},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.4513019621372223},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4505893290042877},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4494282603263855},{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.4278275966644287},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.426597535610199},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.16316816210746765},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1135777.1135882","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1135777.1135882","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th international conference on World Wide Web","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1491545690","https://openalex.org/W1574901103","https://openalex.org/W1973828215","https://openalex.org/W1976141932","https://openalex.org/W1977841655","https://openalex.org/W1994924587","https://openalex.org/W2007069074","https://openalex.org/W2009346361","https://openalex.org/W2036962680","https://openalex.org/W2043035982","https://openalex.org/W2047221353","https://openalex.org/W2049021925","https://openalex.org/W2051158076","https://openalex.org/W2051434435","https://openalex.org/W2053767428","https://openalex.org/W2058475745","https://openalex.org/W2074863013","https://openalex.org/W2084243240","https://openalex.org/W2087663869","https://openalex.org/W2102381086","https://openalex.org/W2109190044","https://openalex.org/W2115461474","https://openalex.org/W2125630417","https://openalex.org/W2149820823","https://openalex.org/W2154148563","https://openalex.org/W2157695030","https://openalex.org/W2160484851","https://openalex.org/W2165657574","https://openalex.org/W2169463693","https://openalex.org/W2621280964","https://openalex.org/W3005522790","https://openalex.org/W4241542682"],"related_works":["https://openalex.org/W2900382651","https://openalex.org/W2051058708","https://openalex.org/W1981879262","https://openalex.org/W2363417484","https://openalex.org/W4225863708","https://openalex.org/W1480103567","https://openalex.org/W1599970036","https://openalex.org/W1849827364","https://openalex.org/W2786299737","https://openalex.org/W2161526918"],"abstract_inverted_index":{"We":[0,84],"introduce":[1],"a":[2,14,44,68,89,110,115,177,220],"new,":[3],"powerful":[4],"class":[5],"of":[6,13,50,88,121,195,212,222,232],"text":[7],"proximity":[8],"queries:":[9],"find":[10],"an":[11,138,199],"instance":[12],"given":[15,25,29],"\"answer":[16],"type\"":[17],"(person,":[18],"place,":[19],"distance)":[20],"near":[21],"\"selector\"":[22],"tokens":[23],"matching":[24],"literals":[26],"or":[27],"satisfying":[28],"ground":[30],"predicates.":[31],"An":[32],"example":[33],"query":[34,132,157,217],"is":[35,41,140,229],"type=distance":[36],"NEAR":[37],"Hamburg":[38],"Munich.":[39],"Nearness":[40],"defined":[42],"as":[43],"flexible,":[45],"trainable":[46],"parameterized":[47],"aggregation":[48],"function":[49,117,129],"the":[51,56,62,86,127,131,148,151,161,165,190,193,213,233],"selectors,":[52],"their":[53,59],"frequency":[54],"in":[55,104,150,156],"corpus,":[57],"and":[58,80,97,123,183,192],"distance":[60],"from":[61,118,206],"candidate":[63],"answer.":[64],"Such":[65],"queries":[66,122,187],"provide":[67],"key":[69,100],"data":[70,76],"reduction":[71],"step":[72],"for":[73,94],"information":[74,91],"extraction,":[75],"integration,":[77],"question":[78],"answering,":[79],"other":[81],"text-processing":[82],"applications.":[83],"describe":[85],"architecture":[87],"next-generation":[90],"retrieval":[92],"engine":[93],"such":[95],"applications,":[96],"investigate":[98],"two":[99],"technical":[101],"problems":[102],"faced":[103],"building":[105],"it.":[106],"First,":[107],"we":[108,146,208],"propose":[109],"new":[111,166],"algorithm":[112],"that":[113],"estimates":[114],"scoring":[116,128],"past":[119],"logs":[120,158],"answer":[124,139],"spans.":[125],"Plugging":[126],"into":[130],"processor":[133],"gives":[134],"high":[135],"accuracy:":[136],"typically,":[137],"found":[141],"at":[142],"rank":[143],"2-4.":[144],"Second,":[145],"exploit":[147],"skew":[149],"distribution":[152],"over":[153],"types":[154,205],"seen":[155],"to":[159],"optimize":[160],"space":[162,236],"required":[163,169],"by":[164,170,219],"index":[167,202,227,235],"structures":[168],"our":[171,196],"system.":[172,197],"Extensive":[173],"performance":[174],"studies":[175],"with":[176],"10GB,":[178],"2-million":[179],"document":[180],"TREC":[181,186],"corpus":[182],"several":[184],"hundred":[185],"show":[188],"both":[189],"accuracy":[191],"efficiency":[194],"From":[198],"initial":[200],"4.3GB":[201],"using":[203],"18,000":[204],"WordNet,":[207],"can":[209],"discard":[210],"88%":[211],"space,":[214],"while":[215],"inflating":[216],"times":[218],"factor":[221],"only":[223,230],"1.9.":[224],"Our":[225],"final":[226],"overhead":[228],"20%":[231],"total":[234],"needed.":[237]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
