{"id":"https://openalex.org/W3034334126","doi":"https://doi.org/10.1145/3340531.3412779","title":"ORCAS: 18 Million Clicked Query-Document Pairs for Analyzing Search","display_name":"ORCAS: 18 Million Clicked Query-Document Pairs for Analyzing Search","publication_year":2020,"publication_date":"2020-10-19","ids":{"openalex":"https://openalex.org/W3034334126","doi":"https://doi.org/10.1145/3340531.3412779","mag":"3034334126"},"language":"en","primary_location":{"id":"doi:10.1145/3340531.3412779","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3340531.3412779","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Information &amp; Knowledge Management","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2006.05324","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055132321","display_name":"Nick Craswell","orcid":"https://orcid.org/0000-0002-9351-8137"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Nick Craswell","raw_affiliation_strings":["Microsoft, Seattle, WA, USA","Microsoft (United States), Redmond, United States"],"affiliations":[{"raw_affiliation_string":"Microsoft, Seattle, WA, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Microsoft (United States), Redmond, United States","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103325247","display_name":"Daniel Campos","orcid":"https://orcid.org/0000-0002-5138-8426"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]},{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Campos","raw_affiliation_strings":["Microsoft &amp; University of Washington, Seattle, WA, USA","Microsoft (United States), Redmond, United States"],"affiliations":[{"raw_affiliation_string":"Microsoft &amp; University of Washington, Seattle, WA, USA","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I201448701"]},{"raw_affiliation_string":"Microsoft (United States), Redmond, United States","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048533217","display_name":"Bhaskar Mitra","orcid":"https://orcid.org/0000-0002-5270-5550"},"institutions":[{"id":"https://openalex.org/I4210153468","display_name":"Microsoft (Canada)","ror":"https://ror.org/04xhxg104","country_code":"CA","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210153468"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["CA","GB"],"is_corresponding":false,"raw_author_name":"Bhaskar Mitra","raw_affiliation_strings":["Microsoft &amp; University College London, Montreal, PQ, Canada","University College London, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Microsoft &amp; University College London, Montreal, PQ, Canada","institution_ids":["https://openalex.org/I4210153468","https://openalex.org/I45129253"]},{"raw_affiliation_string":"University College London, London, United Kingdom","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076265623","display_name":"Emine Y\u0131lmaz","orcid":"https://orcid.org/0000-0002-3434-8932"},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Emine Yilmaz","raw_affiliation_strings":["University College London, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University College London, London, United Kingdom","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090703924","display_name":"Bodo Billerbeck","orcid":"https://orcid.org/0000-0002-9311-8504"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bodo Billerbeck","raw_affiliation_strings":["Microsoft, Melbourne, VIC, Australia","Microsoft (United States), Redmond, United States"],"affiliations":[{"raw_affiliation_string":"Microsoft, Melbourne, VIC, Australia","institution_ids":[]},{"raw_affiliation_string":"Microsoft (United States), Redmond, United States","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5055132321"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":0.56614728,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.73867807,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"2983","last_page":"2989"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8397082686424255},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.7795913219451904},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.7698724269866943},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.4836641252040863},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4427235722541809},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3842291533946991}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8397082686424255},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.7795913219451904},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.7698724269866943},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.4836641252040863},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4427235722541809},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3842291533946991},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1145/3340531.3412779","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3340531.3412779","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Information &amp; Knowledge Management","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2006.05324","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2006.05324","pdf_url":"https://arxiv.org/pdf/2006.05324","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"mag:3034334126","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2006.05324","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2006.05324","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2006.05324","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:3196071292","is_oa":false,"landing_page_url":"https://www.microsoft.com/en-us/research/publication/orcas-18-million-clicked-query-document-pairs-for-analyzing-search/","pdf_url":null,"source":{"id":"https://openalex.org/S4306418063","display_name":"Conference on Information and Knowledge Management","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"Conference on Information and Knowledge Management","raw_type":null}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2006.05324","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2006.05324","pdf_url":"https://arxiv.org/pdf/2006.05324","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3034334126.pdf","grobid_xml":"https://content.openalex.org/works/W3034334126.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W1972645849","https://openalex.org/W1973435495","https://openalex.org/W1973867972","https://openalex.org/W1982858363","https://openalex.org/W2062918108","https://openalex.org/W2086378526","https://openalex.org/W2093245971","https://openalex.org/W2093646604","https://openalex.org/W2129235726","https://openalex.org/W2135500808","https://openalex.org/W2136189984","https://openalex.org/W2143196462","https://openalex.org/W2143331230","https://openalex.org/W2153190022","https://openalex.org/W2160555926","https://openalex.org/W2539671052","https://openalex.org/W2951534261","https://openalex.org/W2955375559","https://openalex.org/W2991671759","https://openalex.org/W3044284384","https://openalex.org/W3044812140","https://openalex.org/W3178067142","https://openalex.org/W4300175355","https://openalex.org/W6643393412","https://openalex.org/W6712779561"],"related_works":["https://openalex.org/W3196071292","https://openalex.org/W2294145134","https://openalex.org/W2110650300","https://openalex.org/W2540707634","https://openalex.org/W2917247417","https://openalex.org/W966218981","https://openalex.org/W2407544020","https://openalex.org/W2798771323","https://openalex.org/W2407032911","https://openalex.org/W2036362156","https://openalex.org/W2388783363","https://openalex.org/W1490642710","https://openalex.org/W3115089445","https://openalex.org/W3140308550","https://openalex.org/W2046375057","https://openalex.org/W2118206599","https://openalex.org/W94637519","https://openalex.org/W1608091204","https://openalex.org/W2025422739","https://openalex.org/W2159981039"],"abstract_inverted_index":{"Users":[0],"of":[1,39,73,89,98,150],"Web":[2],"search":[3],"engines":[4],"reveal":[5],"their":[6],"information":[7,20],"needs":[8],"through":[9],"queries":[10,91],"and":[11,63,92,111,159],"clicks,":[12],"making":[13],"click":[14,23,49,120],"logs":[15,24],"a":[16,48,66,148],"useful":[17],"asset":[18],"for":[19,30],"retrieval.":[21],"However,":[22],"have":[25,78],"not":[26],"been":[27],"publicly":[28],"released":[29],"academic":[31],"use,":[32],"because":[33],"they":[34],"can":[35],"be":[36],"too":[37],"revealing":[38],"personally":[40],"or":[41],"commercially":[42],"sensitive":[43],"information.":[44],"This":[45],"paper":[46],"describes":[47],"data":[50,121],"release":[51],"related":[52],"to":[53,82,94,101,122,139],"the":[54,74,119,124,144,151],"TREC":[55,75,95,125],"Deep":[56],"Learning":[57],"Track":[58],"document":[59],"corpus.":[60,145],"After":[61],"aggregation":[62],"filtering,":[64],"including":[65],"k-anonymity":[67],"requirement,":[68],"we":[69],"find":[70],"1.4":[71],"million":[72,80,84],"DL":[76,126],"URLs":[77,142],"18":[79],"connections":[81,93,138],"10":[83],"distinct":[85],"queries.":[86],"Our":[87],"dataset":[88],"these":[90],"documents":[96],"is":[97],"similar":[99],"size":[100],"proprietary":[102],"datasets":[103],"used":[104],"in":[105,143,157],"previous":[106],"papers":[107],"on":[108],"query":[109],"mining":[110],"ranking.":[112],"We":[113,146],"perform":[114],"some":[115],"preliminary":[116],"experiments":[117],"using":[118],"augment":[123],"training":[127],"data,":[128],"offering":[129],"by":[130],"comparison:":[131],"28x":[132],"more":[133,137,141],"queries,":[134],"with":[135],"49x":[136],"4.4x":[140],"present":[147],"description":[149],"dataset's":[152],"generation":[153],"process,":[154],"characteristics,":[155],"use":[156],"ranking":[158],"suggest":[160],"other":[161],"potential":[162],"uses.":[163]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
