{"id":"https://openalex.org/W2158551114","doi":"https://doi.org/10.1145/1963405.1963457","title":"Web scale NLP","display_name":"Web scale NLP","publication_year":2011,"publication_date":"2011-03-28","ids":{"openalex":"https://openalex.org/W2158551114","doi":"https://doi.org/10.1145/1963405.1963457","mag":"2158551114"},"language":"en","primary_location":{"id":"doi:10.1145/1963405.1963457","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1963405.1963457","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th international conference on World wide web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041659067","display_name":"Kuansan Wang","orcid":"https://orcid.org/0000-0001-7089-7966"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kuansan Wang","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082795450","display_name":"Christopher Thrasher","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Thrasher","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081017604","display_name":"Bo-June Hsu","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bo-June Paul Hsu","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5041659067"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":8.1239,"has_fulltext":false,"cited_by_count":44,"citation_normalized_percentile":{"value":0.97496728,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"357","last_page":"366"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.854529619216919},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6116502285003662},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5760951042175293},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.48603135347366333},{"id":"https://openalex.org/keywords/web-application","display_name":"Web application","score":0.45921608805656433},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4180624485015869},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.20355677604675293}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.854529619216919},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6116502285003662},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5760951042175293},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.48603135347366333},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.45921608805656433},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4180624485015869},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.20355677604675293},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1963405.1963457","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1963405.1963457","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th international conference on World wide web","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/1","score":0.6600000262260437,"display_name":"No poverty"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W294504433","https://openalex.org/W1576685422","https://openalex.org/W1595375842","https://openalex.org/W1985414497","https://openalex.org/W2003471189","https://openalex.org/W2006317942","https://openalex.org/W2016856586","https://openalex.org/W2032558547","https://openalex.org/W2068017609","https://openalex.org/W2069870183","https://openalex.org/W2069997576","https://openalex.org/W2074546930","https://openalex.org/W2079349523","https://openalex.org/W2095432453","https://openalex.org/W2103407879","https://openalex.org/W2105008421","https://openalex.org/W2122228338","https://openalex.org/W2131408881","https://openalex.org/W2134913565","https://openalex.org/W2135322081","https://openalex.org/W2145833060","https://openalex.org/W2156985047","https://openalex.org/W2160885631","https://openalex.org/W2161952424","https://openalex.org/W2952343510","https://openalex.org/W3021371423","https://openalex.org/W6813823174"],"related_works":["https://openalex.org/W2169518243","https://openalex.org/W3188962172","https://openalex.org/W4389443772","https://openalex.org/W2772917594","https://openalex.org/W4306742369","https://openalex.org/W2548721895","https://openalex.org/W4303457083","https://openalex.org/W2373456246","https://openalex.org/W3204019825","https://openalex.org/W3098003361"],"abstract_inverted_index":{"This":[0],"paper":[1],"uses":[2],"the":[3,37,41,53,63,67,101,112,126,133,144,161,174,177,183,190,195,199,209,213,253,260,266,273,279,302,309],"URL":[4,200],"word":[5,103,156,191,201,280],"breaking":[6,104,192,202,281],"task":[7,203],"as":[8,16,139,225],"an":[9,316],"example":[10],"to":[11,34,46,59,143,181,205,252,278,298],"elaborate":[12],"what":[13],"we":[14],"identify":[15],"crucial":[17],"in":[18,52,189,248,294],"designing":[19],"statistical":[20],"natural":[21],"language":[22,49,178,222],"processing":[23,88],"(NLP)":[24],"algorithms":[25],"for":[26,73,87,198,265,283],"Web":[27,54,121],"scale":[28,122,171],"applications:":[29],"(1)":[30],"rudimentary":[31],"multilingual":[32],"capabilities":[33],"cope":[35],"with":[36,62,305],"global":[38],"nature":[39],"of":[40,66,80,91,169,208,237,255,319],"Web,":[42,68],"(2)":[43],"multi-style":[44],"modeling":[45,245],"handle":[47],"diverse":[48],"styles":[50,241],"seen":[51],"contents,":[55],"(3)":[56],"fast":[57],"adaptation":[58],"keep":[60],"pace":[61],"dynamic":[64],"changes":[65],"(4)":[69],"minimal":[70,84,306],"heuristic":[71],"assumptions":[72,142],"generalizability":[74],"and":[75,77,83,109,148,163,194,231],"robustness,":[76],"(5)":[78],"possibilities":[79],"efficient":[81,153],"implementations":[82],"manual":[85],"efforts":[86],"massive":[89],"amount":[90],"data":[92],"at":[93],"a":[94,120,150,167,186,256],"reasonable":[95],"cost.":[96],"We":[97,130],"first":[98,127],"show":[99],"that":[100,207],"state-of-the-art":[102],"techniques":[105,135],"can":[106,124,136,321],"be":[107,137,206,323],"unified":[108],"generalized":[110],"under":[111],"Bayesian":[113],"minimum":[114],"risk":[115],"(BMR)":[116],"framework":[117,162],"that,":[118],"using":[119,325],"N-gram,":[123,259],"meet":[125],"three":[128],"requirements.":[129],"discuss":[131],"how":[132],"existing":[134],"viewed":[138],"introducing":[140],"additional":[141],"basic":[145],"BMR":[146],"framework,":[147],"describe":[149],"generic":[151],"yet":[152],"implementation":[154,165],"called":[155],"synchronous":[157],"beam":[158],"search.":[159],"Testing":[160],"its":[164],"on":[166,313],"series":[168],"large":[170],"experiments":[172],"reveals":[173],"following.":[175],"First,":[176],"style":[179],"used":[180],"build":[182],"model":[184,304,311],"plays":[185],"critical":[187],"role":[188],"task,":[193],"most":[196,263],"suitable":[197],"appears":[204],"document":[210,227,314],"title":[211,267],"where":[212],"best":[214],"performance":[215,282,300],"is":[216,262],"obtained.":[217],"Models":[218],"created":[219],"from":[220,226,243],"other":[221],"styles,":[223],"such":[224],"body,":[228],"anchor":[229],"text,":[230],"even":[232],"queries,":[233],"exhibit":[234],"varying":[235],"degrees":[236],"mismatch.":[238],"Although":[239],"all":[240],"benefit":[242],"increasing":[244],"power":[246],"which,":[247],"our":[249],"experiments,":[250],"corresponds":[251],"use":[254],"higher":[257],"order":[258],"gain":[261],"recognizable":[264],"model.":[268],"The":[269],"heuristics":[270],"proposed":[271],"by":[272],"prior":[274],"arts":[275],"do":[276],"contribute":[277],"mismatched":[284],"or":[285],"less":[286,291],"powerful":[287],"models,":[288],"but":[289],"are":[290],"effective":[292],"and,":[293],"many":[295],"cases,":[296],"lead":[297],"poorer":[299],"than":[301],"matched":[303,310],"assumptions.":[307],"For":[308],"based":[312],"titles,":[315],"accuracy":[317],"rate":[318],"97.18%":[320],"already":[322],"achieved":[324],"simple":[326],"trigram":[327],"without":[328],"any":[329],"heuristics.":[330]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":7},{"year":2013,"cited_by_count":6},{"year":2012,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
