{"id":"https://openalex.org/W2013109830","doi":"https://doi.org/10.1145/1031171.1031194","title":"Acquisition of categorized named entities for web search","display_name":"Acquisition of categorized named entities for web search","publication_year":2004,"publication_date":"2004-11-13","ids":{"openalex":"https://openalex.org/W2013109830","doi":"https://doi.org/10.1145/1031171.1031194","mag":"2013109830"},"language":"en","primary_location":{"id":"doi:10.1145/1031171.1031194","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1031171.1031194","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the thirteenth ACM international conference on Information and knowledge management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103731322","display_name":"Marius Pa\u015fca","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Marius Pasca","raw_affiliation_strings":["Google Inc., Mountain View, CA"],"affiliations":[{"raw_affiliation_string":"Google Inc., Mountain View, CA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5103731322"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":13.4912,"has_fulltext":false,"cited_by_count":138,"citation_normalized_percentile":{"value":0.98751918,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"137","last_page":"145"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.761903703212738},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6285140514373779},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.6156070232391357},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.543904185295105},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5419860482215881},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5203577876091003},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5055904388427734},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4412725567817688},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4352780878543854},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4108749330043793}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.761903703212738},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6285140514373779},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.6156070232391357},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.543904185295105},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5419860482215881},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5203577876091003},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5055904388427734},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4412725567817688},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4352780878543854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4108749330043793},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1031171.1031194","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1031171.1031194","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the thirteenth ACM international conference on Information and knowledge management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Partnerships for the goals","id":"https://metadata.un.org/sdg/17","score":0.4099999964237213}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W69597389","https://openalex.org/W97622340","https://openalex.org/W179879056","https://openalex.org/W197270748","https://openalex.org/W1489949474","https://openalex.org/W1576504150","https://openalex.org/W1606142945","https://openalex.org/W1632114991","https://openalex.org/W1982982698","https://openalex.org/W1989966618","https://openalex.org/W2005449026","https://openalex.org/W2007756016","https://openalex.org/W2033709196","https://openalex.org/W2038721957","https://openalex.org/W2068737686","https://openalex.org/W2076002267","https://openalex.org/W2088656180","https://openalex.org/W2100377551","https://openalex.org/W2103931177","https://openalex.org/W2115461474","https://openalex.org/W2122410182","https://openalex.org/W2135843243","https://openalex.org/W2143042932","https://openalex.org/W2167435923","https://openalex.org/W2343954916","https://openalex.org/W2539468319","https://openalex.org/W2608239929","https://openalex.org/W2785349534","https://openalex.org/W2786175397","https://openalex.org/W2786972369","https://openalex.org/W2953149585","https://openalex.org/W3023431232","https://openalex.org/W4235505822","https://openalex.org/W4372267129","https://openalex.org/W6602823196","https://openalex.org/W6603979058","https://openalex.org/W6607278195","https://openalex.org/W6629296869","https://openalex.org/W6678087030"],"related_works":["https://openalex.org/W2078793151","https://openalex.org/W2916255597","https://openalex.org/W3017222382","https://openalex.org/W3128216712","https://openalex.org/W3136915866","https://openalex.org/W2886890203","https://openalex.org/W2983934248","https://openalex.org/W4390279576","https://openalex.org/W4313535650","https://openalex.org/W1605730749"],"abstract_inverted_index":{"The":[0,18,124,138],"recognition":[1,29,149],"of":[2,20,42,46,68,135,194,200],"names":[3,160],"and":[4,16,91,107,181,206,214],"their":[5],"associated":[6],"categories":[7,45,67,193],"within":[8],"unstructured":[9,133],"text":[10,134],"traditionally":[11],"relies":[12],"on":[13,191,209],"semantic":[14],"lexicons":[15,26],"gazetteers.":[17],"amount":[19],"effort":[21],"required":[22],"to":[23,30,71,131,146],"assemble":[24],"large":[25],"confines":[27],"the":[28,66,95,132,192,201],"either":[31],"a":[32,39,55,101,104,113,141],"limited":[33],"domain":[34,169],"(e.g.,":[35,48,82,94],"<i>medical":[36],"imaging</i>),":[37],"or":[38,161],"small":[40],"set":[41],"pre-defined,":[43],"broader":[44],"interest":[47,70],"<i>persons</i>,":[49],"<i>countries</i>,":[50],"<i>organizations</i>,":[51],"<i>products</i>).":[52],"This":[53],"constitutes":[54],"serious":[56],"limitation":[57],"in":[58,121,150,171,203],"an":[59,108],"information":[60],"seeking":[61],"context.":[62],"In":[63],"this":[64],"case,":[65],"potential":[69],"users":[72],"are":[73],"more":[74],"diverse":[75],"(<i>universities</i>,":[76],"<i>agencies</i>,":[77],"<i>retailers</i>,":[78],"<i>celebrities</i>),":[79],"often":[80],"refined":[81],"<i>SLR":[83],"digital":[84],"cameras</i>,":[85],"<i>programming":[86],"languages</i>,":[87],"<i>multinational":[88],"oil":[89],"companies</i>),":[90],"usually":[92],"overlapping":[93],"same":[96],"entity":[97,148],"may":[98],"be":[99],"concurrently":[100],"<i>brand":[102],"name</i>,":[103],"<i>technology":[105],"company</i>,":[106],"<i>industry":[109],"leader</i>).":[110],"We":[111,197],"present":[112],"lightly":[114,179],"supervised":[115],"method":[116,125,139,202],"for":[117],"acquiring":[118],"named":[119,147],"entities":[120],"arbitrary":[122],"categories.":[123],"applies":[126],"lightweight":[127],"lexico-syntactic":[128],"extraction":[129,173],"patterns":[130],"Web":[136,204,212],"documents.":[137],"is":[140,177],"departure":[142],"from":[143],"traditional":[144],"approaches":[145],"that:":[151],"1)":[152],"it":[153,164,176,184],"does":[154,165,185],"not":[155,166,186],"require":[156],"any":[157,168,188],"start-up":[158],"seed":[159],"training;":[162],"2)":[163],"encode":[167],"knowledge":[170],"its":[172],"patterns;":[174],"3)":[175],"only":[178],"supervised,":[180],"data-driven;":[182],"4)":[183],"impose":[187],"a-priori":[189],"restriction":[190],"extracted":[195],"names.":[196],"illustrate":[198],"applications":[199],"search,":[205],"describe":[207],"experiments":[208],"500":[210],"million":[211],"documents":[213],"news":[215],"articles.":[216]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":6},{"year":2013,"cited_by_count":10},{"year":2012,"cited_by_count":18}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
