{"id":"https://openalex.org/W3041122072","doi":"https://doi.org/10.24963/ijcai.2020/655","title":"Learning URI Selection Criteria to Improve the Crawling of Linked Open Data (Extended Abstract)","display_name":"Learning URI Selection Criteria to Improve the Crawling of Linked Open Data (Extended Abstract)","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3041122072","doi":"https://doi.org/10.24963/ijcai.2020/655","mag":"3041122072"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2020/655","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/655","pdf_url":"https://www.ijcai.org/proceedings/2020/0655.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2020/0655.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048448341","display_name":"Hai Huang","orcid":"https://orcid.org/0000-0003-1412-0567"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Hai Huang","raw_affiliation_strings":["Inria"],"affiliations":[{"raw_affiliation_string":"Inria","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044946197","display_name":"Fabien Gandon","orcid":"https://orcid.org/0000-0003-0543-1232"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Fabien Gandon","raw_affiliation_strings":["INRIA"],"affiliations":[{"raw_affiliation_string":"INRIA","institution_ids":["https://openalex.org/I1326498283"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5048448341"],"corresponding_institution_ids":["https://openalex.org/I1326498283"],"apc_list":null,"apc_paid":null,"fwci":0.28307364,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.63515845,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"4730","last_page":"4734"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9825999736785889,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.83411705493927},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.7780762910842896},{"id":"https://openalex.org/keywords/rdf","display_name":"RDF","score":0.6524877548217773},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.6468491554260254},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.6235046982765198},{"id":"https://openalex.org/keywords/linked-data","display_name":"Linked data","score":0.5806430578231812},{"id":"https://openalex.org/keywords/bloom-filter","display_name":"Bloom filter","score":0.5485504269599915},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.5038003325462341},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.486203134059906},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.47991809248924255},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.47980573773384094},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.44719433784484863},{"id":"https://openalex.org/keywords/upload","display_name":"Upload","score":0.44220292568206787},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.43989208340644836},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4300858974456787},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3796539604663849},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.33557045459747314},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.25065457820892334},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.24015921354293823},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2109658122062683}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.83411705493927},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.7780762910842896},{"id":"https://openalex.org/C147497476","wikidata":"https://www.wikidata.org/wiki/Q54872","display_name":"RDF","level":3,"score":0.6524877548217773},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.6468491554260254},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.6235046982765198},{"id":"https://openalex.org/C69075417","wikidata":"https://www.wikidata.org/wiki/Q515701","display_name":"Linked data","level":3,"score":0.5806430578231812},{"id":"https://openalex.org/C147224247","wikidata":"https://www.wikidata.org/wiki/Q885373","display_name":"Bloom filter","level":2,"score":0.5485504269599915},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.5038003325462341},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.486203134059906},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.47991809248924255},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.47980573773384094},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.44719433784484863},{"id":"https://openalex.org/C71901391","wikidata":"https://www.wikidata.org/wiki/Q7126699","display_name":"Upload","level":2,"score":0.44220292568206787},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.43989208340644836},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4300858974456787},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3796539604663849},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.33557045459747314},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.25065457820892334},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.24015921354293823},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2109658122062683},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.24963/ijcai.2020/655","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/655","pdf_url":"https://www.ijcai.org/proceedings/2020/0655.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:gala.gre.ac.uk:36599","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401244","display_name":"Greenwich Academic Literature Archive (University of Greenwich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I55060895","host_organization_name":"University of Greenwich","host_organization_lineage":["https://openalex.org/I55060895"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Conference Proceedings"},{"id":"pmh:oai:HAL:hal-03064912v1","is_oa":false,"landing_page_url":"https://inria.hal.science/hal-03064912","pdf_url":null,"source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IJCAI 2020 - 29th International Joint Conference on Artificial Intelligence, Jan 2021, Yokohama, Japan","raw_type":"Conference papers"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2020/655","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/655","pdf_url":"https://www.ijcai.org/proceedings/2020/0655.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3041122072.pdf","grobid_xml":"https://content.openalex.org/works/W3041122072.grobid-xml"},"referenced_works_count":12,"referenced_works":["https://openalex.org/W1489992655","https://openalex.org/W1647671624","https://openalex.org/W1993977866","https://openalex.org/W2070996757","https://openalex.org/W2097979891","https://openalex.org/W2124673015","https://openalex.org/W2158420658","https://openalex.org/W2394815666","https://openalex.org/W2523023877","https://openalex.org/W2622701666","https://openalex.org/W2923104132","https://openalex.org/W3124908524"],"related_works":["https://openalex.org/W2566658409","https://openalex.org/W3119324922","https://openalex.org/W2352686120","https://openalex.org/W2372594123","https://openalex.org/W2358310581","https://openalex.org/W2026132847","https://openalex.org/W4385695127","https://openalex.org/W2137810919","https://openalex.org/W2964752624","https://openalex.org/W4255854114"],"abstract_inverted_index":{"A":[0],"Linked":[1,37,101],"Data":[2,38,102],"crawler":[3],"performs":[4],"a":[5,27,109,137],"selection":[6],"to":[7,41,48,71,96,115,158],"focus":[8],"on":[9,16,103],"collecting":[10],"linked":[11],"RDF":[12,52,117],"(including":[13],"RDFa)":[14],"data":[15,53,118],"the":[17,20,33,61,77,84,104,124,129,132],"Web.":[18],"From":[19],"perspectives":[21],"of":[22,36,86,140],"throughput":[23],"and":[24,30,55,93,131],"coverage,":[25],"given":[26],"newly":[28,110],"discovered":[29,111],"targeted":[31],"URI,":[32],"key":[34],"issue":[35],"crawlers":[39,154],"is":[40,46,58],"decide":[42],"whether":[43,108],"this":[44,82,89],"URI":[45,112],"likely":[47],"dereference":[49],"into":[50],"an":[51,116],"source":[54,119],"therefore":[56],"it":[57,63],"worth":[59],"downloading":[60],"representation":[62],"points":[64],"to.":[65],"Current":[66],"solutions":[67],"adopt":[68],"heuristic":[69],"rules":[70],"filter":[72],"irrelevant":[73],"URIs.":[74],"But":[75],"when":[76],"heuristics":[78],"are":[79],"too":[80],"restrictive":[81],"hampers":[83],"coverage":[85],"crawling.":[87],"In":[88],"paper,":[90],"we":[91,134],"propose":[92],"compare":[94,146],"approaches":[95],"learn":[97],"strategies":[98],"for":[99],"crawling":[100],"Web":[105],"by":[106],"predicting":[107,128],"will":[113],"lead":[114],"or":[120],"not.":[121],"We":[122,145],"detail":[123],"features":[125],"used":[126],"in":[127],"relevance":[130],"methods":[133,157],"evaluated":[135],"including":[136,152],"promising":[138],"adaptation":[139],"FTRL-proximal":[141],"online":[142],"learning":[143],"algorithm.":[144],"several":[147],"options":[148],"through":[149],"extensive":[150],"experiments":[151],"existing":[153],"as":[155],"baseline":[156],"evaluate":[159],"their":[160],"efficiency.":[161]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2020-07-16T00:00:00"}
