{"id":"https://openalex.org/W2048606452","doi":"https://doi.org/10.4018/jdls.2010100102","title":"Sampling the Web as Training Data for Text Classification","display_name":"Sampling the Web as Training Data for Text Classification","publication_year":2010,"publication_date":"2010-01-01","ids":{"openalex":"https://openalex.org/W2048606452","doi":"https://doi.org/10.4018/jdls.2010100102","mag":"2048606452"},"language":"en","primary_location":{"id":"doi:10.4018/jdls.2010100102","is_oa":false,"landing_page_url":"https://doi.org/10.4018/jdls.2010100102","pdf_url":null,"source":{"id":"https://openalex.org/S143974849","display_name":"International Journal of Digital Library Systems","issn_l":"1947-9077","issn":["1947-9077","1947-9085"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320424","host_organization_name":"IGI Global","host_organization_lineage":["https://openalex.org/P4310320424"],"host_organization_lineage_names":["IGI Global"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Digital Library Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023286205","display_name":"Wei-Yen Day","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Wei-Yen Day","raw_affiliation_strings":["National Taiwan University, Taiwan","National Taiwan Univ (Taiwan)"],"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taiwan","institution_ids":["https://openalex.org/I16733864"]},{"raw_affiliation_string":"National Taiwan Univ (Taiwan)","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048246086","display_name":"Chun-Yi Chi","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chun-Yi Chi","raw_affiliation_strings":["National Taiwan University, Taiwan","National Taiwan Univ (Taiwan)"],"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taiwan","institution_ids":["https://openalex.org/I16733864"]},{"raw_affiliation_string":"National Taiwan Univ (Taiwan)","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007897863","display_name":"Ruey-Cheng Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Ruey-Cheng Chen","raw_affiliation_strings":["National Taiwan University, Taiwan","National Taiwan Univ (Taiwan)"],"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taiwan","institution_ids":["https://openalex.org/I16733864"]},{"raw_affiliation_string":"National Taiwan Univ (Taiwan)","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071684622","display_name":"Pu\u2010Jen Cheng","orcid":"https://orcid.org/0000-0001-5892-0385"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Pu-Jen Cheng","raw_affiliation_strings":["National Taiwan University, Taiwan","National Taiwan Univ (Taiwan)"],"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taiwan","institution_ids":["https://openalex.org/I16733864"]},{"raw_affiliation_string":"National Taiwan Univ (Taiwan)","institution_ids":["https://openalex.org/I16733864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5023286205"],"corresponding_institution_ids":["https://openalex.org/I16733864"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.10777136,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"1","issue":"4","first_page":"24","last_page":"42"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8648651838302612},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.6454824805259705},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6275745630264282},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5705065727233887},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5699599385261536},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5647045373916626},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5089006423950195},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.49530425667762756},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.419016569852829},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37554118037223816},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3621435761451721}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8648651838302612},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.6454824805259705},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6275745630264282},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5705065727233887},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5699599385261536},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5647045373916626},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5089006423950195},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.49530425667762756},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.419016569852829},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37554118037223816},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3621435761451721},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.4018/jdls.2010100102","is_oa":false,"landing_page_url":"https://doi.org/10.4018/jdls.2010100102","pdf_url":null,"source":{"id":"https://openalex.org/S143974849","display_name":"International Journal of Digital Library Systems","issn_l":"1947-9077","issn":["1947-9077","1947-9085"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320424","host_organization_name":"IGI Global","host_organization_lineage":["https://openalex.org/P4310320424"],"host_organization_lineage_names":["IGI Global"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Digital Library Systems","raw_type":"journal-article"},{"id":"pmh:oai:RePEc:igg:jdls00:v:1:y:2010:i:4:p:24-42","is_oa":false,"landing_page_url":"http://services.igi-global.com/resolvedoi/resolve.aspx?doi=10.4018/jdls.2010100102","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.7099999785423279}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W3035862","https://openalex.org/W22461475","https://openalex.org/W1566116839","https://openalex.org/W1979459060","https://openalex.org/W1987996059","https://openalex.org/W2005422315","https://openalex.org/W2034117506","https://openalex.org/W2049770399","https://openalex.org/W2069484355","https://openalex.org/W2070920590","https://openalex.org/W2097089247","https://openalex.org/W2101246646","https://openalex.org/W2114535528","https://openalex.org/W2129611069","https://openalex.org/W2139422812","https://openalex.org/W2156772624","https://openalex.org/W2158295392","https://openalex.org/W4231856373","https://openalex.org/W4244487999"],"related_works":["https://openalex.org/W3172695526","https://openalex.org/W2981877337","https://openalex.org/W3203938600","https://openalex.org/W4286910063","https://openalex.org/W2163707935","https://openalex.org/W83146503","https://openalex.org/W2169074127","https://openalex.org/W202723009","https://openalex.org/W2145955964","https://openalex.org/W2188612292"],"abstract_inverted_index":{"Data":[0],"acquisition":[1],"is":[2,58,84,96],"a":[3,49],"major":[4],"concern":[5],"in":[6,81,145,150],"text":[7],"classification.":[8],"The":[9,55,75,136],"excessive":[10],"human":[11],"efforts":[12],"required":[13],"by":[14,44],"conventional":[15,147],"methods":[16,79,124],"to":[17,28,39,59,68,161],"build":[18],"up":[19],"quality":[20],"training":[21,42,73,134],"collection":[22],"might":[23],"not":[24],"always":[25],"be":[26],"available":[27],"research":[29],"workers.":[30],"In":[31],"this":[32,82,163],"paper,":[33],"the":[34,46,88,94,100,122,146],"authors":[35],"look":[36],"into":[37],"possibilities":[38],"automatically":[40],"collect":[41],"data":[43],"sampling":[45,87,99],"Web":[47,141],"with":[48],"set":[50],"of":[51,77,108,152],"given":[52],"class":[53],"names.":[54],"basic":[56],"idea":[57],"populate":[60],"appropriate":[61],"keywords":[62],"and":[63,93,118,154],"submit":[64],"them":[65],"as":[66],"queries":[67],"search":[69],"engines":[70],"for":[71,103,139],"acquiring":[72],"data.":[74,135],"first":[76],"two":[78,115],"presented":[80],"paper":[83],"based":[85,97],"on":[86,98,114],"common":[89],"concepts":[90,102],"among":[91],"classes":[92],"other":[95],"discriminative":[101],"each":[104],"class.":[105],"A":[106],"series":[107],"experiments":[109],"were":[110],"carried":[111],"out":[112],"independently":[113],"different":[116],"datasets":[117],"results":[119],"show":[120],"that":[121],"proposed":[123],"significantly":[125],"improve":[126],"classifier":[127],"performance":[128],"even":[129],"without":[130],"using":[131],"manually":[132],"labeled":[133],"authors\u2019":[137],"strategy":[138],"retrieving":[140],"samples":[142],"substantially":[143],"helps":[144],"document":[148],"classification":[149],"terms":[151],"accuracy":[153],"efficiency.":[155],"Request":[156],"access":[157],"from":[158],"your":[159],"librarian":[160],"read":[162],"article's":[164],"full":[165],"text.":[166]},"counts_by_year":[{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
