{"id":"https://openalex.org/W1993202648","doi":"https://doi.org/10.1145/1135777.1135870","title":"Large-scale text categorization by batch mode active learning","display_name":"Large-scale text categorization by batch mode active learning","publication_year":2006,"publication_date":"2006-05-23","ids":{"openalex":"https://openalex.org/W1993202648","doi":"https://doi.org/10.1145/1135777.1135870","mag":"1993202648"},"language":"en","primary_location":{"id":"doi:10.1145/1135777.1135870","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1135777.1135870","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th international conference on World Wide Web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074834854","display_name":"Steven C. H. Hoi","orcid":"https://orcid.org/0000-0002-4584-3453"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Steven C. H. Hoi","raw_affiliation_strings":["The Chinese University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069394608","display_name":"Rong Jin","orcid":"https://orcid.org/0000-0002-8797-4646"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rong Jin","raw_affiliation_strings":["Michigan State University, East Lansing, MI","Michigan State University East Lansing, MI"],"affiliations":[{"raw_affiliation_string":"Michigan State University, East Lansing, MI","institution_ids":["https://openalex.org/I87216513"]},{"raw_affiliation_string":"Michigan State University East Lansing, MI","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069596903","display_name":"Michael R. Lyu","orcid":"https://orcid.org/0000-0002-3666-5798"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Michael R. Lyu","raw_affiliation_strings":["The Chinese University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5074834854"],"corresponding_institution_ids":["https://openalex.org/I177725633"],"apc_list":null,"apc_paid":null,"fwci":18.1635,"has_fulltext":false,"cited_by_count":255,"citation_normalized_percentile":{"value":0.99272013,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"633","last_page":"642"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.7671527862548828},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6840358972549438},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5867466926574707},{"id":"https://openalex.org/keywords/text-categorization","display_name":"Text categorization","score":0.5066841244697571},{"id":"https://openalex.org/keywords/mode","display_name":"Mode (computer interface)","score":0.48733434081077576},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45763543248176575},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.45469680428504944},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.15724864602088928},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.06879982352256775}],"concepts":[{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.7671527862548828},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6840358972549438},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5867466926574707},{"id":"https://openalex.org/C2986744138","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Text categorization","level":3,"score":0.5066841244697571},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.48733434081077576},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45763543248176575},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45469680428504944},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.15724864602088928},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.06879982352256775},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1145/1135777.1135870","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1135777.1135870","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th international conference on World Wide Web","raw_type":"proceedings-article"},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-3390","is_oa":false,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/2390","pdf_url":null,"source":{"id":"https://openalex.org/S4377196871","display_name":"Institutional Knowledge (InK) - Institutional Knowledge at Singapore Management University (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1145/1135777.1135870","raw_type":"Conference Proceeding Article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.134.2839","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.134.2839","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cse.msu.edu/~rongjin/publications/www2006.final.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.421.1269","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.421.1269","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cais.ntu.edu.sg/~chhoi/paper_pdf/WWW06BMAL.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.79.3232","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.79.3232","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.cuhk.hk/~lyu/paper_pdf/fp598-hoi.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320335138","display_name":"Shun Hing Institute of Advanced Engineering","ror":"https://ror.org/00t33hh48"},{"id":"https://openalex.org/F4320336769","display_name":"Institute of Engineering Research, Seoul National University","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W107306860","https://openalex.org/W155555924","https://openalex.org/W1483816357","https://openalex.org/W1484084878","https://openalex.org/W1507179106","https://openalex.org/W1514707997","https://openalex.org/W1514940655","https://openalex.org/W1515450954","https://openalex.org/W1528361845","https://openalex.org/W1574877594","https://openalex.org/W1576520375","https://openalex.org/W1604938182","https://openalex.org/W1751470192","https://openalex.org/W1967073510","https://openalex.org/W1993934121","https://openalex.org/W2005422315","https://openalex.org/W2020316999","https://openalex.org/W2023607075","https://openalex.org/W2080021732","https://openalex.org/W2085989833","https://openalex.org/W2094934653","https://openalex.org/W2107008379","https://openalex.org/W2114535528","https://openalex.org/W2115305054","https://openalex.org/W2122837498","https://openalex.org/W2127809819","https://openalex.org/W2128073546","https://openalex.org/W2135293965","https://openalex.org/W2136504847","https://openalex.org/W2140214528","https://openalex.org/W2148603752","https://openalex.org/W2149684865","https://openalex.org/W2154159734","https://openalex.org/W2156622608","https://openalex.org/W2169899598","https://openalex.org/W2296319761","https://openalex.org/W2435251607","https://openalex.org/W2492794003","https://openalex.org/W2560674852","https://openalex.org/W2798593669","https://openalex.org/W2890534020","https://openalex.org/W2999905431","https://openalex.org/W3171280080","https://openalex.org/W4212774754","https://openalex.org/W4229666556","https://openalex.org/W4285719527","https://openalex.org/W4404299545","https://openalex.org/W6604296810","https://openalex.org/W6628964739","https://openalex.org/W6634442568","https://openalex.org/W6665018949","https://openalex.org/W6683368480","https://openalex.org/W6750605398","https://openalex.org/W6992441829","https://openalex.org/W7071374342"],"related_works":["https://openalex.org/W2360898036","https://openalex.org/W2390857744","https://openalex.org/W2133651098","https://openalex.org/W2390698788","https://openalex.org/W2078570174","https://openalex.org/W2035261173","https://openalex.org/W2376836843","https://openalex.org/W2125109223","https://openalex.org/W2383063829","https://openalex.org/W2082678934"],"abstract_inverted_index":{"Large-scale":[0],"text":[1,18,29,51,82,110,198,208],"categorization":[2,19,83,199,209],"is":[3,20,93,126,188],"an":[4],"important":[5],"research":[6],"topic":[7],"for":[8,31,61,112,143,197,210],"Web":[9,213],"data":[10],"mining.":[11],"One":[12],"of":[13,65,109,120,158,165,173],"the":[14,24,37,57,81,121,130,133,151,156,163,170,192],"challenges":[15],"in":[16,27,75,115],"large-scale":[17],"how":[21,127],"to":[22,49,55,86,128,167],"reduce":[23,129],"human":[25],"efforts":[26],"labeling":[28,62,113],"documents":[30,60,111,166],"building":[32],"reliable":[33],"classification":[34,175],"models.":[35],"In":[36,95],"past,":[38],"there":[39],"have":[40,183],"been":[41],"many":[42],"studies":[43,67],"on":[44,69],"applying":[45],"active":[46,102,124,194],"learning":[47,103,125,195],"methods":[48],"automatic":[50],"categorization,":[52],"which":[53],"try":[54],"select":[56],"most":[58],"informative":[59],"manually.":[63],"Most":[64],"these":[66],"focused":[68],"selecting":[70],"a":[71,79,100,107,174,203],"single":[72],"unlabeled":[73],"document":[74,92],"each":[76,90,116,138],"iteration.":[77,117],"As":[78],"result,":[80],"model":[84,144,159],"has":[85],"be":[87,202],"retrained":[88],"after":[89],"labeled":[91],"solicited.":[94],"this":[96,147],"paper,":[97],"we":[98,149],"present":[99],"novel":[101],"algorithm":[104,187],"that":[105,137,185],"selects":[106],"batch":[108,122],"manually":[114],"The":[118],"key":[119],"mode":[123],"redundancy":[131],"among":[132],"selected":[134],"examples":[135],"such":[136],"example":[139],"provides":[140],"unique":[141],"information":[142,153,172],"updating.":[145],"To":[146],"end,":[148],"use":[150],"Fisher":[152,171],"matrix":[154],"as":[155],"measurement":[157],"uncertainty":[160],"and":[161,200],"choose":[162],"set":[164],"effectively":[168],"maximize":[169],"model.":[176],"Extensive":[177],"experiments":[178],"with":[179],"three":[180],"different":[181],"datasets":[182],"shown":[184],"our":[186],"more":[189],"effective":[190],"than":[191],"state-of-the-art":[193],"techniques":[196],"can":[201],"promising":[204],"tool":[205],"toward":[206],"largescale":[207],"World":[211],"Wide":[212],"documents.":[214]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":17},{"year":2020,"cited_by_count":18},{"year":2019,"cited_by_count":16},{"year":2018,"cited_by_count":16},{"year":2017,"cited_by_count":12},{"year":2016,"cited_by_count":17},{"year":2015,"cited_by_count":12},{"year":2014,"cited_by_count":11},{"year":2013,"cited_by_count":11},{"year":2012,"cited_by_count":30}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
