{"id":"https://openalex.org/W1504211824","doi":"https://doi.org/10.1007/978-3-540-24655-8_6","title":"A Query-Dependent Duplicate Detection Approach for Large Scale Search Engines","display_name":"A Query-Dependent Duplicate Detection Approach for Large Scale Search Engines","publication_year":2004,"publication_date":"2004-01-01","ids":{"openalex":"https://openalex.org/W1504211824","doi":"https://doi.org/10.1007/978-3-540-24655-8_6","mag":"1504211824"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-540-24655-8_6","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-540-24655-8_6","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054765777","display_name":"Shaozhi Ye","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shaozhi Ye","raw_affiliation_strings":["Microsoft Research Asia, 5F, Sigma Center, No 49 Zhichun Rd, Beijing, China, 100080"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, 5F, Sigma Center, No 49 Zhichun Rd, Beijing, China, 100080","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101505570","display_name":"Ruihua Song","orcid":"https://orcid.org/0000-0001-6036-9035"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruihua Song","raw_affiliation_strings":["Microsoft Research Asia, 5F, Sigma Center, No 49 Zhichun Rd, Beijing, China, 100080"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, 5F, Sigma Center, No 49 Zhichun Rd, Beijing, China, 100080","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025631695","display_name":"Ji-Rong Wen","orcid":"https://orcid.org/0000-0002-9777-9676"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ji-Rong Wen","raw_affiliation_strings":["Microsoft Research Asia, 5F, Sigma Center, No 49 Zhichun Rd, Beijing, China, 100080"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, 5F, Sigma Center, No 49 Zhichun Rd, Beijing, China, 100080","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103733614","display_name":"Wei\u2010Ying Ma","orcid":"https://orcid.org/0000-0002-7384-0735"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei-Ying Ma","raw_affiliation_strings":["Microsoft Research Asia, 5F, Sigma Center, No 49 Zhichun Rd, Beijing, China, 100080"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, 5F, Sigma Center, No 49 Zhichun Rd, Beijing, China, 100080","institution_ids":["https://openalex.org/I4210113369"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5054765777"],"corresponding_institution_ids":["https://openalex.org/I4210113369"],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":null,"fwci":1.0711,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.77319893,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"48","last_page":"58"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8598939180374146},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.7695850133895874},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.7065079212188721},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.6985993981361389},{"id":"https://openalex.org/keywords/web-query-classification","display_name":"Web query classification","score":0.674162745475769},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6368248462677002},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5778029561042786},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.5325784683227539},{"id":"https://openalex.org/keywords/query-expansion","display_name":"Query expansion","score":0.528583824634552},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.4987814426422119},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4374033808708191},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.43624719977378845},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2884005308151245},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2723434567451477}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8598939180374146},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.7695850133895874},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.7065079212188721},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.6985993981361389},{"id":"https://openalex.org/C118689300","wikidata":"https://www.wikidata.org/wiki/Q7978614","display_name":"Web query classification","level":4,"score":0.674162745475769},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6368248462677002},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5778029561042786},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.5325784683227539},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.528583824634552},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.4987814426422119},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4374033808708191},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.43624719977378845},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2884005308151245},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2723434567451477},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/978-3-540-24655-8_6","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-540-24655-8_6","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.62.8027","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.62.8027","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://wwwcsif.cs.ucdavis.edu/~yeshao/papers/apweb04.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1536700701","https://openalex.org/W1566513354","https://openalex.org/W1941707302","https://openalex.org/W1997438973","https://openalex.org/W2007807439","https://openalex.org/W2007842132","https://openalex.org/W2040075907","https://openalex.org/W2057989089","https://openalex.org/W2067432306","https://openalex.org/W2089544534","https://openalex.org/W2127363101","https://openalex.org/W2130417465","https://openalex.org/W2152565070","https://openalex.org/W2169347997","https://openalex.org/W3141668667","https://openalex.org/W4238430687","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2096359267","https://openalex.org/W2901901036","https://openalex.org/W2026738364","https://openalex.org/W3008917487","https://openalex.org/W2124814993","https://openalex.org/W1521725692","https://openalex.org/W2093300859","https://openalex.org/W2013069866","https://openalex.org/W2113390685","https://openalex.org/W2049540727"],"abstract_inverted_index":null,"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
