{"id":"https://openalex.org/W2015895266","doi":"https://doi.org/10.1145/1341531.1341553","title":"Personal name classification in web queries","display_name":"Personal name classification in web queries","publication_year":2008,"publication_date":"2008-01-01","ids":{"openalex":"https://openalex.org/W2015895266","doi":"https://doi.org/10.1145/1341531.1341553","mag":"2015895266"},"language":"en","primary_location":{"id":"doi:10.1145/1341531.1341553","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1341531.1341553","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the international conference on Web search and web data mining  - WSDM '08","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5105366551","display_name":"Dou Shen","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dou Shen","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068388234","display_name":"Toby Walkery","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Toby Walkery","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003371639","display_name":"Zijian Zhengy","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zijian Zhengy","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056297905","display_name":"Qiang Yangz","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Qiang Yangz","raw_affiliation_strings":["Hong Kong University of Science & Technology"],"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science & Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100414144","display_name":"Ying Li","orcid":"https://orcid.org/0000-0002-1143-1258"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ying Li","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5105366551"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":4.8122,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.94415174,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"149","last_page":"149"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7983936071395874},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.6922383904457092},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6379947662353516},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5257878303527832},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.48891177773475647},{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.47741615772247314},{"id":"https://openalex.org/keywords/personally-identifiable-information","display_name":"Personally identifiable information","score":0.47718822956085205},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.47394683957099915},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.458482950925827},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42035534977912903},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.41836410760879517},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3425339460372925},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3395523428916931},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.10532602667808533}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7983936071395874},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.6922383904457092},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6379947662353516},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5257878303527832},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.48891177773475647},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.47741615772247314},{"id":"https://openalex.org/C169093310","wikidata":"https://www.wikidata.org/wiki/Q3702971","display_name":"Personally identifiable information","level":2,"score":0.47718822956085205},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.47394683957099915},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.458482950925827},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42035534977912903},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.41836410760879517},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3425339460372925},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3395523428916931},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.10532602667808533},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1341531.1341553","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1341531.1341553","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the international conference on Web search and web data mining  - WSDM '08","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.141.1548","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.141.1548","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cse.ust.hk/~qyang/Docs/2008/p149-shen%20wsdm%20pname.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.699999988079071}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1528601000","https://openalex.org/W1574901103","https://openalex.org/W1972645849","https://openalex.org/W1973867972","https://openalex.org/W1982889956","https://openalex.org/W1984478253","https://openalex.org/W1995945562","https://openalex.org/W2033073041","https://openalex.org/W2056451646","https://openalex.org/W2075635421","https://openalex.org/W2113455164","https://openalex.org/W2114535528","https://openalex.org/W2117813082","https://openalex.org/W2121080163","https://openalex.org/W2124658502","https://openalex.org/W2137217355","https://openalex.org/W2149684865","https://openalex.org/W2151752770","https://openalex.org/W2156577800","https://openalex.org/W2156909104","https://openalex.org/W2162993204","https://openalex.org/W2548695521","https://openalex.org/W2751318774"],"related_works":["https://openalex.org/W3195168932","https://openalex.org/W1996541855","https://openalex.org/W83344948","https://openalex.org/W4383535405","https://openalex.org/W2001121861","https://openalex.org/W3034643069","https://openalex.org/W4221021152","https://openalex.org/W855861036","https://openalex.org/W1529799947","https://openalex.org/W2019264297"],"abstract_inverted_index":{"Personal":[0],"names":[1,26],"are":[2,15,79,106,118],"an":[3,149],"important":[4],"kind":[5],"of":[6,37,63,155,171,202,241],"Web":[7,10,77],"queries":[8,78],"in":[9,17,192],"search,":[11],"and":[12,141,159,163,221,228],"yet":[13],"they":[14],"special":[16],"many":[18],"ways.":[19],"Strategies":[20],"for":[21,34,44,187],"retrieving":[22],"information":[23],"on":[24,73,152,230],"personal":[25,45,58,160,177],"should":[27],"therefore":[28],"be":[29,87,175],"different":[30,185],"from":[31],"the":[32,41,61,104,116,153,169,203,239,253],"strategies":[33],"other":[35],"types":[36],"queries.":[38],"To":[39,143],"improve":[40],"search":[42],"quality":[43],"names,":[46],"a":[47,54,57,136,172,176,194,199,206],"first":[48],"step":[49],"is":[50,56,92,130,196,247],"to":[51,93,111,122,167,174],"detect":[52],"whether":[53],"query":[55,173],"name.":[59,178],"Despite":[60],"importance":[62],"this":[64,74,108,132,165,180],"problem,":[65],"relatively":[66],"little":[67],"previous":[68],"research":[69],"has":[70],"been":[71],"done":[72],"topic.":[75],"Since":[76],"usually":[80],"short,":[81],"conventional":[82],"supervised":[83,222],"machine-learning":[84],"algorithms":[85,215,224],"cannot":[86,134],"applied":[88],"directly.":[89],"An":[90],"alternative":[91],"apply":[94],"some":[95,231],"heuristic":[96],"rules":[97],"coupled":[98],"with":[99,198,213],"name-term":[100,157,190],"dictionaries.":[101],"However,":[102],"when":[103,115],"dictionaries":[105,117,158,191],"small,":[107],"method":[109,133],"tends":[110,121],"make":[112],"false":[113,124],"negatives;":[114],"large,":[119],"it":[120],"generate":[123],"positives.":[125],"A":[126],"more":[127,248,257],"serious":[128],"problem":[129],"that":[131],"provide":[135],"good":[137],"tradeoff":[138],"between":[139],"precision":[140],"recall.":[142],"solve":[144],"these":[145],"problems,":[146],"we":[147,182],"propose":[148],"approach":[150,212],"based":[151],"construction":[154],"probabilistic":[156,189],"name":[161,207],"grammars,":[162],"use":[164],"algorithm":[166],"predict":[168],"probability":[170,200],"In":[179],"paper,":[181],"develop":[183],"four":[184],"methods":[186,220],"building":[188],"which":[193,251],"term":[195,204],"assigned":[197],"value":[201,246],"being":[205],"term.":[208],"We":[209],"compared":[210],"our":[211,242],"baseline":[214,255],"such":[216],"as":[217],"dictionary-based":[218],"look-up":[219],"classification":[223],"including":[225],"logistic":[226],"regression":[227],"SVM":[229],"manually":[232],"labeled":[233],"test":[234],"sets.":[235],"The":[236],"results":[237],"validate":[238],"effectiveness":[240],"approach,":[243],"whose":[244],"F1":[245],"than":[249,258],"79.8%,":[250],"outperforms":[252],"best":[254],"by":[256],"11.3%.":[259]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
