{"id":"https://openalex.org/W1980086685","doi":"https://doi.org/10.1145/2767135","title":"Improving Researcher Homepage Classification with Unlabeled Data","display_name":"Improving Researcher Homepage Classification with Unlabeled Data","publication_year":2015,"publication_date":"2015-10-19","ids":{"openalex":"https://openalex.org/W1980086685","doi":"https://doi.org/10.1145/2767135","mag":"1980086685"},"language":"en","primary_location":{"id":"doi:10.1145/2767135","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2767135","pdf_url":null,"source":{"id":"https://openalex.org/S131231701","display_name":"ACM Transactions on the Web","issn_l":"1559-1131","issn":["1559-1131","1559-114X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on the Web","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076157833","display_name":"Sujatha Das Gollapalli","orcid":"https://orcid.org/0000-0002-4567-8937"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sujatha Das Gollapalli","raw_affiliation_strings":["The Pennsylvania State University","(The Pennsylvania State University"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University","institution_ids":["https://openalex.org/I130769515"]},{"raw_affiliation_string":"(The Pennsylvania State University","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089085275","display_name":"Cornelia Caragea","orcid":"https://orcid.org/0000-0002-5664-2163"},"institutions":[{"id":"https://openalex.org/I123534392","display_name":"University of North Texas","ror":"https://ror.org/00v97ad02","country_code":"US","type":"education","lineage":["https://openalex.org/I123534392"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cornelia Caragea","raw_affiliation_strings":["University of North Texas, TX, USA"],"affiliations":[{"raw_affiliation_string":"University of North Texas, TX, USA","institution_ids":["https://openalex.org/I123534392"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009542542","display_name":"Prasenjit Mitra","orcid":"https://orcid.org/0000-0002-7530-9497"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Prasenjit Mitra","raw_affiliation_strings":["The Pennsylvania State University, PA, USA","The Pennsylvania State University, PA, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University, PA, USA","institution_ids":["https://openalex.org/I130769515"]},{"raw_affiliation_string":"The Pennsylvania State University, PA, USA#TAB#","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001294898","display_name":"C. Lee Giles","orcid":"https://orcid.org/0000-0002-1931-585X"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C. Lee Giles","raw_affiliation_strings":["The Pennsylvania State University, PA, USA","The Pennsylvania State University, PA, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University, PA, USA","institution_ids":["https://openalex.org/I130769515"]},{"raw_affiliation_string":"The Pennsylvania State University, PA, USA#TAB#","institution_ids":["https://openalex.org/I130769515"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5076157833"],"corresponding_institution_ids":["https://openalex.org/I130769515"],"apc_list":null,"apc_paid":null,"fwci":3.1784,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.92590835,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"9","issue":"4","first_page":"1","last_page":"32"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8277698755264282},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.7027193307876587},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.5237831473350525},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.5013985633850098},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4828910827636719},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4507541358470917},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4489792585372925},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.44324973225593567},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.442394882440567},{"id":"https://openalex.org/keywords/co-training","display_name":"Co-training","score":0.4298645257949829},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3545982837677002},{"id":"https://openalex.org/keywords/semi-supervised-learning","display_name":"Semi-supervised learning","score":0.19339227676391602}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8277698755264282},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.7027193307876587},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.5237831473350525},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.5013985633850098},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4828910827636719},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4507541358470917},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4489792585372925},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44324973225593567},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.442394882440567},{"id":"https://openalex.org/C2776959682","wikidata":"https://www.wikidata.org/wiki/Q17005296","display_name":"Co-training","level":3,"score":0.4298645257949829},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3545982837677002},{"id":"https://openalex.org/C58973888","wikidata":"https://www.wikidata.org/wiki/Q1041418","display_name":"Semi-supervised learning","level":2,"score":0.19339227676391602},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2767135","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2767135","pdf_url":null,"source":{"id":"https://openalex.org/S131231701","display_name":"ACM Transactions on the Web","issn_l":"1559-1131","issn":["1559-1131","1559-114X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on the Web","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309090","display_name":"Center for Hierarchical Manufacturing, National Science Foundation","ror":"https://ror.org/043trmd87"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":71,"referenced_works":["https://openalex.org/W71104953","https://openalex.org/W87822204","https://openalex.org/W122956856","https://openalex.org/W173995639","https://openalex.org/W202303397","https://openalex.org/W1489992655","https://openalex.org/W1501667924","https://openalex.org/W1506806321","https://openalex.org/W1528620860","https://openalex.org/W1532325895","https://openalex.org/W1550206324","https://openalex.org/W1563088657","https://openalex.org/W1570448133","https://openalex.org/W1595222087","https://openalex.org/W1889038981","https://openalex.org/W1983531058","https://openalex.org/W2003471189","https://openalex.org/W2013761541","https://openalex.org/W2022322548","https://openalex.org/W2029341294","https://openalex.org/W2037603696","https://openalex.org/W2038721957","https://openalex.org/W2048679005","https://openalex.org/W2059586463","https://openalex.org/W2067698488","https://openalex.org/W2070996757","https://openalex.org/W2081580037","https://openalex.org/W2087787226","https://openalex.org/W2097083044","https://openalex.org/W2097089247","https://openalex.org/W2097580385","https://openalex.org/W2101210369","https://openalex.org/W2103333826","https://openalex.org/W2104660959","https://openalex.org/W2104772551","https://openalex.org/W2107008379","https://openalex.org/W2111700528","https://openalex.org/W2121702856","https://openalex.org/W2122052811","https://openalex.org/W2125327503","https://openalex.org/W2128614648","https://openalex.org/W2130062883","https://openalex.org/W2133348086","https://openalex.org/W2133990480","https://openalex.org/W2134491992","https://openalex.org/W2136504847","https://openalex.org/W2137313854","https://openalex.org/W2139578439","https://openalex.org/W2149490995","https://openalex.org/W2153635508","https://openalex.org/W2154368244","https://openalex.org/W2156772624","https://openalex.org/W2156909104","https://openalex.org/W2161920802","https://openalex.org/W2169899598","https://openalex.org/W2171629518","https://openalex.org/W2215421138","https://openalex.org/W2435251607","https://openalex.org/W2560674852","https://openalex.org/W2584997055","https://openalex.org/W2589887588","https://openalex.org/W2591957553","https://openalex.org/W2604738573","https://openalex.org/W2736801446","https://openalex.org/W2966207845","https://openalex.org/W3005526988","https://openalex.org/W3010865323","https://openalex.org/W3029645440","https://openalex.org/W3041537557","https://openalex.org/W4231934124","https://openalex.org/W4299828299"],"related_works":["https://openalex.org/W4393220254","https://openalex.org/W4321258516","https://openalex.org/W2051833850","https://openalex.org/W4287845917","https://openalex.org/W3156164993","https://openalex.org/W2385015894","https://openalex.org/W2171573941","https://openalex.org/W4360873893","https://openalex.org/W4390135167","https://openalex.org/W2165396616"],"abstract_inverted_index":{"A":[0],"classifier":[1,24],"that":[2,25,63,182,208,285,296,337,351,363],"determines":[3],"if":[4],"a":[5,10,16,23,172,177,190,226,267,283,316,370],"webpage":[6],"is":[7,26,266,282,319],"relevant":[8],"to":[9,28,35,83,87,115,152,188,235,262,304,323],"specified":[11],"set":[12,231],"of":[13,43,56,69,142,180,225,232,238,292],"topics":[14],"comprises":[15],"key":[17],"component":[18],"for":[19,90,133,175,214,242,270,289,332],"focused":[20],"crawling.":[21],"Can":[22],"tuned":[27],"perform":[29],"well":[30],"on":[31,46,66,73,76,164],"training":[32],"datasets":[33,68,86],"continue":[34],"filter":[36],"out":[37],"irrelevant":[38],"pages":[39],"in":[40,53,94,128,161,197,222,258,308,343,357],"the":[41,47,54,91,99,107,137,165,195,200,223,236,277,347,354,358],"face":[42],"changing":[44],"content":[45,365],"Web?":[48],"We":[49,60,120,169,206,294,335],"investigate":[50],"this":[51,95,209],"question":[52],"context":[55],"identifying":[57,309],"researcher":[58,117],"homepages.":[59,135,244,264,310],"show":[61,295,336],"experimentally":[62],"classifiers":[64,89],"trained":[65],"existing":[67],"academic":[70,78,113,167,263],"homepages":[71],"underperform":[72],"\u201cnon-homepages\u201d":[74],"present":[75],"current-day":[77,166],"websites.":[79,168],"As":[80],"an":[81],"alternative":[82],"obtaining":[84],"labeled":[85],"retrain":[88],"new":[92],"content,":[93],"article":[96],"we":[97,104,246,327],"ask":[98],"following":[100],"question:":[101],"\u201cHow":[102],"can":[103,144,218,298],"effectively":[105,153,300],"use":[106,126,154],"unlabeled":[108,155,348],"data":[109,156,278],"readily":[110],"available":[111],"from":[112,199,276,346],"websites":[114],"improve":[116],"homepage":[118,162,333],"classification?\u201d":[119],"design":[121],"novel":[122,173],"URL-based":[123],"features":[124,132,143,275,368],"and":[125,157,217,251,257,273,366],"them":[127],"conjunction":[129,259],"with":[130,260,302],"content-based":[131],"representing":[134],"Within":[136],"co-training":[138,216,261,303,350],"framework,":[139],"these":[140],"sets":[141],"be":[145,219,299],"treated":[146],"as":[147],"complementary":[148],"views":[149,202],"enabling":[150],"us":[151],"obtain":[158,305],"remarkable":[159],"improvements":[160,307],"identification":[163],"also":[170],"propose":[171],"technique":[174,269,284],"\u201clearning":[176],"conforming":[178],"pair":[179],"classifiers\u201d":[181],"mimics":[183],"co-training.":[184,205],"Our":[185,229],"algorithm":[186],"seeks":[187],"minimize":[189],"loss":[191,210],"(objective)":[192],"function":[193],"quantifying":[194],"difference":[196],"predictions":[198],"two":[201],"afforded":[203],"by":[204],"argue":[207],"formulation":[211],"provides":[212],"insights":[213],"understanding":[215],"used":[220],"even":[221],"absence":[224],"validation":[227],"dataset.":[228],"next":[230],"findings":[233],"pertains":[234],"evaluation":[237],"other":[239,329],"state-of-the-art":[240],"techniques":[241,255],"classifying":[243],"First,":[245],"apply":[247],"feature":[248,252,314,324,355],"selection":[249],"(FS)":[250],"hashing":[253],"(FH)":[254],"independently":[256],"FS":[265,297],"well-known":[268],"removing":[271],"redundant":[272],"unnecessary":[274],"representation,":[279],"whereas":[280],"FH":[281],"uses":[286],"hash":[287],"functions":[288],"efficient":[290],"encoding":[291],"features.":[293],"combined":[301],"further":[306],"However,":[311],"using":[312,344],"hashed":[313],"representations,":[315],"performance":[317],"degradation":[318],"observed":[320],"possibly":[321],"due":[322],"collisions.":[325],"Finally,":[326],"evaluate":[328],"semisupervised":[330],"algorithms":[331,340],"classification.":[334],"although":[338],"several":[339],"are":[341],"effective":[342],"information":[345],"instances,":[349],"explicitly":[352],"harnesses":[353],"split":[356],"underlying":[359],"instances":[360],"outperforms":[361],"approaches":[362],"combine":[364],"URL":[367],"into":[369],"single":[371],"view.":[372]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
