{"id":"https://openalex.org/W2033010423","doi":"https://doi.org/10.1145/2348283.2348494","title":"On automatically tagging web documents from examples","display_name":"On automatically tagging web documents from examples","publication_year":2012,"publication_date":"2012-08-12","ids":{"openalex":"https://openalex.org/W2033010423","doi":"https://doi.org/10.1145/2348283.2348494","mag":"2033010423"},"language":"en","primary_location":{"id":"doi:10.1145/2348283.2348494","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2348283.2348494","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 35th international ACM SIGIR conference on Research and development in information retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050504230","display_name":"Nicholas Woodward","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Nicholas Joel Woodward","raw_affiliation_strings":["University of Texas at Austin, Austin, TX, USA","University of Texas at Austin, Austin, TX USA"],"affiliations":[{"raw_affiliation_string":"University of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"University of Texas at Austin, Austin, TX USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101883722","display_name":"Weijia Xu","orcid":"https://orcid.org/0000-0002-5134-6381"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weijia Xu","raw_affiliation_strings":["University of Texas at Austin, Austin, TX, USA","University of Texas at Austin, Austin, TX USA"],"affiliations":[{"raw_affiliation_string":"University of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"University of Texas at Austin, Austin, TX USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079862411","display_name":"Kent Norsworthy","orcid":"https://orcid.org/0000-0002-2951-3665"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kent Norsworthy","raw_affiliation_strings":["University of Texas at Austin, Austin, TX, USA","University of Texas at Austin, Austin, TX USA"],"affiliations":[{"raw_affiliation_string":"University of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"University of Texas at Austin, Austin, TX USA","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5050504230"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.09341225,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1111","last_page":"1112"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8631159067153931},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.7011486291885376},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5369040966033936},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.50849449634552},{"id":"https://openalex.org/keywords/document-retrieval","display_name":"Document retrieval","score":0.4798707067966461},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4473770558834076},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4213939309120178},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33731287717819214},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3131883442401886},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.19861066341400146}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8631159067153931},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.7011486291885376},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5369040966033936},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.50849449634552},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.4798707067966461},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4473770558834076},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4213939309120178},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33731287717819214},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3131883442401886},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.19861066341400146},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2348283.2348494","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2348283.2348494","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 35th international ACM SIGIR conference on Research and development in information retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5099999904632568,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W23905605","https://openalex.org/W2059586463","https://openalex.org/W2071018795","https://openalex.org/W2078323847","https://openalex.org/W2096041903","https://openalex.org/W2158698691"],"related_works":["https://openalex.org/W2389214306","https://openalex.org/W4235240664","https://openalex.org/W2965083567","https://openalex.org/W1838576100","https://openalex.org/W2095886385","https://openalex.org/W2889616422","https://openalex.org/W2089704382","https://openalex.org/W1983399550","https://openalex.org/W97075385","https://openalex.org/W2357523926"],"abstract_inverted_index":{"An":[0],"emerging":[1],"need":[2],"in":[3,47,159],"information":[4],"retrieval":[5,29,189],"is":[6,38],"to":[7,14,24,129,156,167],"identify":[8],"a":[9,59,63,87,115,137,153],"set":[10,85,88,155],"of":[11,27,52,71,89,105,188],"documents":[12,53,158],"conforming":[13],"an":[15],"abstract":[16],"description.":[17],"This":[18],"task":[19],"presents":[20],"two":[21],"major":[22],"challenges":[23],"existing":[25,90],"methods":[26],"document":[28,73,106],"and":[30,50,103,133],"classification.":[31],"First,":[32],"similarity":[33],"based":[34,77,141],"on":[35,78,142],"overall":[36],"content":[37,49],"less":[39],"effective":[40],"because":[41],"there":[42],"may":[43],"be":[44,75,94],"great":[45],"variance":[46],"both":[48],"subject":[51],"produced":[54],"for":[55,171,185],"similar":[56],"functions,":[57],"e.g.":[58],"presidential":[60],"speech":[61],"or":[62,81],"government":[64],"ministry":[65],"white":[66],"paper.":[67],"Second,":[68],"the":[69,72,82,100,122,160],"function":[70],"can":[74],"defined":[76],"user":[79],"interests":[80],"specific":[83],"data":[84,120,174],"through":[86],"examples,":[91],"which":[92],"cannot":[93],"described":[95],"with":[96,182],"standard":[97],"categories.":[98],"Additionally,":[99],"increasing":[101],"volume":[102],"complexity":[104],"collections":[107],"demands":[108],"new":[109,138],"scalable":[110],"computational":[111],"solutions.":[112],"We":[113,135],"conducted":[114],"case":[116],"study":[117],"using":[118],"web-archived":[119],"from":[121,152],"Latin":[123],"American":[124],"Government":[125],"Documents":[126],"Archive":[127],"(LAGDA)":[128],"illustrate":[130],"these":[131],"problems":[132],"challenges.":[134],"propose":[136],"hybrid":[139],"approach":[140,163],"Na\u00efve":[143],"Bayes":[144],"inference":[145],"that":[146],"uses":[147],"mixed":[148],"n-gram":[149],"models":[150],"obtained":[151],"training":[154],"classify":[157],"corpus.":[161],"The":[162,176],"has":[164],"been":[165],"developed":[166],"exploit":[168],"parallel":[169],"processing":[170],"large":[172],"scale":[173],"set.":[175],"preliminary":[177],"work":[178],"shows":[179],"promising":[180],"results":[181],"improved":[183],"accuracy":[184],"this":[186],"type":[187],"problem.":[190]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
