{"id":"https://openalex.org/W4231961336","doi":"https://doi.org/10.1109/asonam.2014.6921661","title":"Semantic crawling: An approach based on Named Entity Recognition","display_name":"Semantic crawling: An approach based on Named Entity Recognition","publication_year":2014,"publication_date":"2014-08-01","ids":{"openalex":"https://openalex.org/W4231961336","doi":"https://doi.org/10.1109/asonam.2014.6921661"},"language":"en","primary_location":{"id":"doi:10.1109/asonam.2014.6921661","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asonam.2014.6921661","pdf_url":null,"source":{"id":"https://openalex.org/S4363608209","display_name":"2014 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM 2014)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM 2014)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113432375","display_name":"Giulia Di Pietro","orcid":null},"institutions":[{"id":"https://openalex.org/I4210121291","display_name":"Synthema (Italy)","ror":"https://ror.org/02mnzkw65","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210121291"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Giulia Di Pietro","raw_affiliation_strings":["Synthema srl, Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"Synthema srl, Pisa, Italy","institution_ids":["https://openalex.org/I4210121291"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030910983","display_name":"Carlo Aliprandi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210121291","display_name":"Synthema (Italy)","ror":"https://ror.org/02mnzkw65","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210121291"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Carlo Aliprandi","raw_affiliation_strings":["Synthema srl, Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"Synthema srl, Pisa, Italy","institution_ids":["https://openalex.org/I4210121291"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082644534","display_name":"Antonio E. De Luca","orcid":null},"institutions":[{"id":"https://openalex.org/I4210121291","display_name":"Synthema (Italy)","ror":"https://ror.org/02mnzkw65","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210121291"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Antonio E. De Luca","raw_affiliation_strings":["Synthema srl, Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"Synthema srl, Pisa, Italy","institution_ids":["https://openalex.org/I4210121291"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082926983","display_name":"M Raffaelli","orcid":null},"institutions":[{"id":"https://openalex.org/I4210121291","display_name":"Synthema (Italy)","ror":"https://ror.org/02mnzkw65","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210121291"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Matteo Raffaelli","raw_affiliation_strings":["Synthema srl, Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"Synthema srl, Pisa, Italy","institution_ids":["https://openalex.org/I4210121291"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044723082","display_name":"Tiziana Soru","orcid":null},"institutions":[{"id":"https://openalex.org/I4210121291","display_name":"Synthema (Italy)","ror":"https://ror.org/02mnzkw65","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210121291"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Tiziana Soru","raw_affiliation_strings":["Synthema srl, Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"Synthema srl, Pisa, Italy","institution_ids":["https://openalex.org/I4210121291"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5113432375"],"corresponding_institution_ids":["https://openalex.org/I4210121291"],"apc_list":null,"apc_paid":null,"fwci":0.40901456,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.7240865,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"51","issue":null,"first_page":"695","last_page":"699"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.8852928876876831},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8173815608024597},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.6474491357803345},{"id":"https://openalex.org/keywords/focused-crawler","display_name":"Focused crawler","score":0.5929186344146729},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5710203647613525},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5664565563201904},{"id":"https://openalex.org/keywords/principal","display_name":"Principal (computer security)","score":0.42101263999938965},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.4153573215007782},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3450356721878052},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.18151086568832397},{"id":"https://openalex.org/keywords/web-server","display_name":"Web server","score":0.1052786111831665}],"concepts":[{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.8852928876876831},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8173815608024597},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.6474491357803345},{"id":"https://openalex.org/C73340581","wikidata":"https://www.wikidata.org/wiki/Q5463958","display_name":"Focused crawler","level":5,"score":0.5929186344146729},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5710203647613525},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5664565563201904},{"id":"https://openalex.org/C144559511","wikidata":"https://www.wikidata.org/wiki/Q2986279","display_name":"Principal (computer security)","level":2,"score":0.42101263999938965},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.4153573215007782},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3450356721878052},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.18151086568832397},{"id":"https://openalex.org/C11392498","wikidata":"https://www.wikidata.org/wiki/Q11288","display_name":"Web server","level":3,"score":0.1052786111831665},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asonam.2014.6921661","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asonam.2014.6921661","pdf_url":null,"source":{"id":"https://openalex.org/S4363608209","display_name":"2014 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM 2014)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM 2014)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.6800000071525574}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1495997854","https://openalex.org/W1651655586","https://openalex.org/W2124673015","https://openalex.org/W2142216989","https://openalex.org/W2774768657","https://openalex.org/W6629744990","https://openalex.org/W6678701036","https://openalex.org/W6746926745","https://openalex.org/W7057732734"],"related_works":["https://openalex.org/W4385695127","https://openalex.org/W2375180657","https://openalex.org/W2042034567","https://openalex.org/W2026132847","https://openalex.org/W1506122440","https://openalex.org/W2137810919","https://openalex.org/W2358310581","https://openalex.org/W2274831913","https://openalex.org/W2352686120","https://openalex.org/W2019080882"],"abstract_inverted_index":{"Law":[0],"Enforcement":[1],"Agencies":[2],"(LEAs)":[3],"are":[4],"increasingly":[5],"more":[6],"reliant":[7],"on":[8],"information":[9,27,195],"and":[10,13,24,35,41,60,105,144,150,249],"communication":[11],"technologies":[12],"affected":[14],"by":[15,19,57],"a":[16,69,87,123,126,134,155,162,178,236],"society":[17],"shaped":[18],"the":[20,52,84,93,108,142,146,165,171,183,194,199,204,208,215,225],"Internet.":[21],"The":[22,113],"richness":[23],"quantity":[25],"of":[26,62,86,110,129,158,170,217],"available":[28],"from":[29,46,122,161],"open":[30],"sources,":[31],"if":[32],"properly":[33],"gathered":[34],"processed,":[36],"can":[37],"provide":[38],"valuable":[39],"intelligence":[40,53],"help":[42],"in":[43,72,141,145],"drawing":[44],"inferences":[45],"existing":[47],"closed":[48],"source":[49],"intelligence.":[50],"Today":[51],"cycle":[54],"is":[55,185,212],"characterized":[56],"manual":[58],"collection":[59,248],"integration":[61,250],"data.":[63],"Named":[64],"Entity":[65],"Recognition":[66],"(NER)":[67],"plays":[68],"fundamental":[70],"role":[71],"Open":[73],"Source":[74],"Intelligence":[75],"(OSINT)":[76],"solutions":[77],"when":[78],"fighting":[79],"crime.":[80],"This":[81,211],"paper":[82,222],"describes":[83],"implementation":[85],"NER-based":[88,227],"focused":[89,240],"web":[90],"crawler":[91,114,184],"under":[92],"EU":[94],"FP7":[95],"Security":[96],"Research":[97],"Project":[98],"CAPER":[99,226],"(Collaborative":[100],"information,":[101],"Acquisition,":[102],"Processing,":[103],"Exploitation":[104],"Reporting":[106],"for":[107,119,154,239],"prevention":[109],"organized":[111],"crime).":[112],"allows":[115],"1.":[116],"to":[117,138,152,168,187,198,234,244],"look":[118,153],"documents":[120,159,191],"starting":[121,160],"URL":[124],"until":[125],"parametric":[127,156],"depth":[128],"levels":[130],"-":[131,149],"also":[132],"specifying":[133],"keyword":[135,163,166,206],"that":[136,192],"has":[137,231],"be":[139,235],"contained":[140],"page":[143],"related":[147],"links":[148],"2.":[151],"number":[157],"(entrusting":[164],"search":[167,173],"one":[169],"principal":[172],"engines,":[174],"thus":[175],"behaving":[176],"as":[177],"meta-search":[179],"engine).":[180],"In":[181,220],"addition,":[182],"able":[186],"retrieve":[188],"only":[189],"those":[190],"contain":[193],"semantically":[196],"relevant":[197],"query":[200],"(in":[201],"other":[202],"words:":[203],"required":[205,209],"with":[207],"sense).":[210],"achieved":[213],"through":[214],"use":[216],"NER":[218],"technologies.":[219],"this":[221],"we":[223],"present":[224],"Semantic":[228],"Crawler,":[229],"which":[230],"been":[232],"proven":[233],"suitable":[237],"tool":[238],"crawling,":[241],"allowing":[242],"LEAs":[243],"drastically":[245],"reduce":[246],"data":[247],"efforts.":[251]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
