{"id":"https://openalex.org/W1976296352","doi":"https://doi.org/10.1109/ccnc.2013.6488526","title":"Focused crawling for building Web comment corpora","display_name":"Focused crawling for building Web comment corpora","publication_year":2013,"publication_date":"2013-01-01","ids":{"openalex":"https://openalex.org/W1976296352","doi":"https://doi.org/10.1109/ccnc.2013.6488526","mag":"1976296352"},"language":"en","primary_location":{"id":"doi:10.1109/ccnc.2013.6488526","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ccnc.2013.6488526","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE 10th Consumer Communications and Networking Conference (CCNC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080762562","display_name":"Melanie Neunerdt","orcid":null},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"M. Neunerdt","raw_affiliation_strings":["Institute for Theoretical Information Technology, RWTH Aachen University, Germany","Inst. for Theor. Inf. Technol., RWTH Aachen Univ., Aachen, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Theoretical Information Technology, RWTH Aachen University, Germany","institution_ids":["https://openalex.org/I887968799"]},{"raw_affiliation_string":"Inst. for Theor. Inf. Technol., RWTH Aachen Univ., Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018449928","display_name":"Markus Niermann","orcid":"https://orcid.org/0000-0001-8327-8910"},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"M. Niermann","raw_affiliation_strings":["Institute for Theoretical Information Technology, RWTH Aachen University, Germany","Inst. for Theor. Inf. Technol., RWTH Aachen Univ., Aachen, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Theoretical Information Technology, RWTH Aachen University, Germany","institution_ids":["https://openalex.org/I887968799"]},{"raw_affiliation_string":"Inst. for Theor. Inf. Technol., RWTH Aachen Univ., Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049026863","display_name":"Rudolf Mathar","orcid":null},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"R. Mathar","raw_affiliation_strings":["Institute for Theoretical Information Technology, RWTH Aachen University, Germany","Inst. for Theor. Inf. Technol., RWTH Aachen Univ., Aachen, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Theoretical Information Technology, RWTH Aachen University, Germany","institution_ids":["https://openalex.org/I887968799"]},{"raw_affiliation_string":"Inst. for Theor. Inf. Technol., RWTH Aachen Univ., Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075870385","display_name":"Bianka Trevisan","orcid":null},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"B. Trevisan","raw_affiliation_strings":["Textlinguistics/Technical Communications, RWTH Aachen University, Germany","Textlinguistics/Tech. Commun., RWTH Aachen Univ., Aachen, Germany"],"affiliations":[{"raw_affiliation_string":"Textlinguistics/Technical Communications, RWTH Aachen University, Germany","institution_ids":["https://openalex.org/I887968799"]},{"raw_affiliation_string":"Textlinguistics/Tech. Commun., RWTH Aachen Univ., Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5080762562"],"corresponding_institution_ids":["https://openalex.org/I887968799"],"apc_list":null,"apc_paid":null,"fwci":5.0155,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.94986796,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"685","last_page":"688"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.977400004863739,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9488000273704529,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.8508294820785522},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8096718788146973},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.8031724691390991},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.802736222743988},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.6620421409606934},{"id":"https://openalex.org/keywords/web-standards","display_name":"Web standards","score":0.6311703324317932},{"id":"https://openalex.org/keywords/web-modeling","display_name":"Web modeling","score":0.5079470276832581},{"id":"https://openalex.org/keywords/web-development","display_name":"Web development","score":0.5018324851989746},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.48821690678596497},{"id":"https://openalex.org/keywords/focused-crawler","display_name":"Focused crawler","score":0.4631566107273102},{"id":"https://openalex.org/keywords/social-semantic-web","display_name":"Social Semantic Web","score":0.41558754444122314},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.41125011444091797},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.3732919991016388},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.22011396288871765}],"concepts":[{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.8508294820785522},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8096718788146973},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.8031724691390991},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.802736222743988},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.6620421409606934},{"id":"https://openalex.org/C182321512","wikidata":"https://www.wikidata.org/wiki/Q1153289","display_name":"Web standards","level":3,"score":0.6311703324317932},{"id":"https://openalex.org/C130436687","wikidata":"https://www.wikidata.org/wiki/Q7978591","display_name":"Web modeling","level":3,"score":0.5079470276832581},{"id":"https://openalex.org/C79373723","wikidata":"https://www.wikidata.org/wiki/Q386275","display_name":"Web development","level":3,"score":0.5018324851989746},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.48821690678596497},{"id":"https://openalex.org/C73340581","wikidata":"https://www.wikidata.org/wiki/Q5463958","display_name":"Focused crawler","level":5,"score":0.4631566107273102},{"id":"https://openalex.org/C534406577","wikidata":"https://www.wikidata.org/wiki/Q7550843","display_name":"Social Semantic Web","level":3,"score":0.41558754444122314},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.41125011444091797},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.3732919991016388},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.22011396288871765},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ccnc.2013.6488526","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ccnc.2013.6488526","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE 10th Consumer Communications and Networking Conference (CCNC)","raw_type":"proceedings-article"},{"id":"pmh:oai:publications.rwth-aachen.de:197992","is_oa":false,"landing_page_url":"https://publications.rwth-aachen.de/record/197992","pdf_url":null,"source":{"id":"https://openalex.org/S4306401362","display_name":"RWTH Publications (RWTH Aachen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887968799","host_organization_name":"RWTH Aachen University","host_organization_lineage":["https://openalex.org/I887968799"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"The 10th Annual IEEE Consumer Communications & Networking Conference : CCNC 2013 ; January 11-14, 2013, Las Vegas, Nevada USA<br/>10. Annual IEEE Consumer Communications & Networking Conference : CCNC 2013, CCNC 2013, Las Vegas, NV, USA, 2013-01-11 - 2013-01-14","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5299999713897705,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320324232","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W256861921","https://openalex.org/W1489992655","https://openalex.org/W1854214752","https://openalex.org/W2017726337","https://openalex.org/W2029341294","https://openalex.org/W2128915886","https://openalex.org/W2138621811","https://openalex.org/W2156093156","https://openalex.org/W2158601853","https://openalex.org/W6628936300","https://openalex.org/W6683142088","https://openalex.org/W6995438373"],"related_works":["https://openalex.org/W4385695127","https://openalex.org/W2375180657","https://openalex.org/W4248730791","https://openalex.org/W2026132847","https://openalex.org/W1506122440","https://openalex.org/W2137810919","https://openalex.org/W2358310581","https://openalex.org/W2352686120","https://openalex.org/W2019080882","https://openalex.org/W2372594123"],"abstract_inverted_index":{"Web":[0,20,45,56,64,70,101,105],"2.0":[1],"provides":[2],"various":[3,108],"types":[4],"of":[5,24,69],"social":[6],"media":[7],"applications,":[8],"e.g.,":[9],"blogs,":[10],"forums":[11],"and":[12],"news":[13],"sites":[14],"that":[15,103],"allow":[16],"users":[17],"to":[18,43,61],"post":[19],"comments.":[21],"This":[22],"kind":[23],"communication":[25],"plays":[26],"an":[27],"important":[28],"role":[29],"in":[30],"acceptance":[31],"research.":[32],"To":[33],"extract":[34],"different":[35],"opinions":[36],"from":[37,107],"such":[38,49],"data,":[39],"it":[40],"is":[41,72],"necessary":[42],"build":[44,62],"comment":[46],"corpora.":[47],"Building":[48],"corpora":[50],"requires":[51],"focused":[52,55,84],"crawling.":[53],"Many":[54],"crawling":[57],"algorithms":[58],"are":[59],"known":[60],"topic-specific":[63],"collections.":[65],"However,":[66],"the":[67],"type":[68],"pages":[71,102],"typically":[73],"not":[74],"considered.":[75],"In":[76],"this":[77],"paper,":[78],"we":[79],"introduce":[80],"a":[81,88],"new":[82],"type-specific":[83],"crawler,":[85],"which":[86],"uses":[87],"classifier":[89],"based":[90],"on":[91],"HTML":[92],"meta":[93],"information.":[94],"Its":[95],"application":[96],"allows":[97],"for":[98],"collecting":[99],"only":[100],"cover":[104],"comments":[106],"domains.":[109]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
