{"id":"https://openalex.org/W4367046714","doi":"https://doi.org/10.1145/3543507.3583236","title":"Wikidata as a seed for Web Extraction","display_name":"Wikidata as a seed for Web Extraction","publication_year":2023,"publication_date":"2023-04-26","ids":{"openalex":"https://openalex.org/W4367046714","doi":"https://doi.org/10.1145/3543507.3583236"},"language":"en","primary_location":{"id":"doi:10.1145/3543507.3583236","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3543507.3583236","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036029871","display_name":"Kunpeng Guo","orcid":"https://orcid.org/0000-0002-0692-0057"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210085887","display_name":"Laboratoire Hubert Curien","ror":"https://ror.org/0028p8r67","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I277688954","https://openalex.org/I4210085887","https://openalex.org/I4210091746","https://openalex.org/I4210095849","https://openalex.org/I4405259976","https://openalex.org/I59692284"]},{"id":"https://openalex.org/I86767153","display_name":"Universit\u00e9 Jean Monnet","ror":"https://ror.org/04yznqr36","country_code":"FR","type":"education","lineage":["https://openalex.org/I86767153"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Kunpeng Guo","raw_affiliation_strings":["The QA Company SAS, France and Laboratoire Hubert Curien, UMR CNRS 5516, Universit\u00e9 Jean Monnet, France"],"affiliations":[{"raw_affiliation_string":"The QA Company SAS, France and Laboratoire Hubert Curien, UMR CNRS 5516, Universit\u00e9 Jean Monnet, France","institution_ids":["https://openalex.org/I86767153","https://openalex.org/I1294671590","https://openalex.org/I4210085887"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073664557","display_name":"Dennis Diefenbach","orcid":"https://orcid.org/0000-0002-0046-2219"},"institutions":[{"id":"https://openalex.org/I4210085887","display_name":"Laboratoire Hubert Curien","ror":"https://ror.org/0028p8r67","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I277688954","https://openalex.org/I4210085887","https://openalex.org/I4210091746","https://openalex.org/I4210095849","https://openalex.org/I4405259976","https://openalex.org/I59692284"]},{"id":"https://openalex.org/I86767153","display_name":"Universit\u00e9 Jean Monnet","ror":"https://ror.org/04yznqr36","country_code":"FR","type":"education","lineage":["https://openalex.org/I86767153"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Dennis Diefenbach","raw_affiliation_strings":["The QA Company SAS, France and Laboratoire Hubert Curien UMR 5516, Universit\u00e9 Jean Monnet, France"],"affiliations":[{"raw_affiliation_string":"The QA Company SAS, France and Laboratoire Hubert Curien UMR 5516, Universit\u00e9 Jean Monnet, France","institution_ids":["https://openalex.org/I86767153","https://openalex.org/I4210085887"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047732112","display_name":"Antoine Gourru","orcid":"https://orcid.org/0000-0003-3571-2430"},"institutions":[{"id":"https://openalex.org/I4210085887","display_name":"Laboratoire Hubert Curien","ror":"https://ror.org/0028p8r67","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I277688954","https://openalex.org/I4210085887","https://openalex.org/I4210091746","https://openalex.org/I4210095849","https://openalex.org/I4405259976","https://openalex.org/I59692284"]},{"id":"https://openalex.org/I86767153","display_name":"Universit\u00e9 Jean Monnet","ror":"https://ror.org/04yznqr36","country_code":"FR","type":"education","lineage":["https://openalex.org/I86767153"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Antoine Gourru","raw_affiliation_strings":["Laboratoire Hubert Curien UMR 5516, Universit\u00e9 Jean Monnet, France"],"affiliations":[{"raw_affiliation_string":"Laboratoire Hubert Curien UMR 5516, Universit\u00e9 Jean Monnet, France","institution_ids":["https://openalex.org/I86767153","https://openalex.org/I4210085887"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082775286","display_name":"Christophe Gravier","orcid":"https://orcid.org/0000-0001-8586-6302"},"institutions":[{"id":"https://openalex.org/I4210085887","display_name":"Laboratoire Hubert Curien","ror":"https://ror.org/0028p8r67","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I203339264","https://openalex.org/I277688954","https://openalex.org/I4210085887","https://openalex.org/I4210091746","https://openalex.org/I4210095849","https://openalex.org/I4405259976","https://openalex.org/I59692284"]},{"id":"https://openalex.org/I86767153","display_name":"Universit\u00e9 Jean Monnet","ror":"https://ror.org/04yznqr36","country_code":"FR","type":"education","lineage":["https://openalex.org/I86767153"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Christophe Gravier","raw_affiliation_strings":["Laboratoire Hubert Curien UMR 5516, Universit\u00e9 Jean Monnet, France"],"affiliations":[{"raw_affiliation_string":"Laboratoire Hubert Curien UMR 5516, Universit\u00e9 Jean Monnet, France","institution_ids":["https://openalex.org/I86767153","https://openalex.org/I4210085887"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5036029871"],"corresponding_institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I4210085887","https://openalex.org/I86767153"],"apc_list":null,"apc_paid":null,"fwci":0.8641,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.77993794,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2402","last_page":"2411"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8668602108955383},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5632541179656982},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5364845991134644},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5319079756736755},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.518261194229126},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.4831652045249939},{"id":"https://openalex.org/keywords/knowledge-graph","display_name":"Knowledge graph","score":0.4562133848667145},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.4316033720970154}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8668602108955383},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5632541179656982},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5364845991134644},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5319079756736755},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.518261194229126},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.4831652045249939},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.4562133848667145},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.4316033720970154},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3543507.3583236","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3543507.3583236","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2023","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-04428911v1","is_oa":false,"landing_page_url":"https://hal.science/hal-04428911","pdf_url":null,"source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"WWW '23: The ACM Web Conference 2023, Apr 2023, Austin TX USA, United States. pp.2402-2411, &#x27E8;10.1145/3543507.3583236&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7099999785423279,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W102708294","https://openalex.org/W2000411838","https://openalex.org/W2016753842","https://openalex.org/W2080133951","https://openalex.org/W2094728533","https://openalex.org/W2104583100","https://openalex.org/W2946693801","https://openalex.org/W2964072618","https://openalex.org/W3091620232","https://openalex.org/W4285820265"],"related_works":["https://openalex.org/W2611741382","https://openalex.org/W3135843367","https://openalex.org/W2613685774","https://openalex.org/W3112355890","https://openalex.org/W4220741973","https://openalex.org/W36911888","https://openalex.org/W4385731361","https://openalex.org/W4383535523","https://openalex.org/W3006227201","https://openalex.org/W4385489363"],"abstract_inverted_index":{"Wikidata":[0,131,194,225,288],"has":[1],"grown":[2],"to":[3,109,148,157,174,232,274,283],"a":[4,76,104,215,230,245],"knowledge":[5,77,289],"graph":[6,78],"with":[7],"an":[8],"impressive":[9],"size.":[10],"To":[11],"date,":[12],"it":[13],"contains":[14],"more":[15,60],"than":[16],"17":[17],"billion":[18],"triples":[19],"collecting":[20],"information":[21,39,190],"about":[22],"people,":[23],"places,":[24],"films,":[25],"stars,":[26],"publications,":[27],"proteins,":[28],"and":[29,57,67,71,88,97,111,154,209,220,281],"many":[30],"more.":[31],"On":[32],"the":[33,38,41,91,189,195,202,236,284,287],"other":[34],"side,":[35],"most":[36],"of":[37,90,218,248,262,286],"on":[40,137,235],"Web":[42,120,161,185],"is":[43,79,107,135,273],"not":[44,177],"published":[45,117],"in":[46,62,75,193,277],"highly":[47],"structured":[48],"data":[49,74,92],"repositories":[50],"like":[51],"Wikidata,":[52],"but":[53,182],"rather":[54],"as":[55,229],"unstructured":[56],"semi-structured":[58],"content,":[59],"concretely":[61],"HTML":[63],"pages":[64],"containing":[65],"text":[66],"tables.":[68],"Finding,":[69],"monitoring,":[70],"organizing":[72],"this":[73,94,100,223],"requiring":[80],"considerable":[81],"work":[82],"from":[83,143,151,160,179,184],"human":[84,269],"editors.":[85,132],"The":[86,133,271],"volume":[87],"complexity":[89],"make":[93],"task":[95],"difficult":[96],"time-consuming.":[98],"In":[99],"work,":[101],"we":[102,166,242,257],"present":[103],"framework":[105,134,197],"that":[106,115,123,145,168,241,256,264],"able":[108],"identify":[110],"extract":[112,149,158,175,211,233,260],"new":[113,212],"facts":[114,150,159,176,213,234,263],"are":[116,146],"under":[118],"multiple":[119],"domains":[121],"so":[122],"they":[124],"can":[125,171,198,210,226,243,258,265],"be":[126,172,199,227,266],"proposed":[127,196,267],"for":[128,204,214,268],"validation":[129],"by":[130],"relying":[136],"question-answering":[138],"technologies.":[139],"We":[140],"take":[141],"inspiration":[142],"ideas":[144],"used":[147,228],"textual":[152,180],"collections":[153,181],"adapt":[155],"them":[156],"pages.":[162,186],"For":[163],"achieving":[164],"this,":[165],"demonstrate":[167],"language":[169],"models":[170],"adapted":[173],"only":[178],"also":[183],"By":[187],"exploiting":[188],"already":[191],"contained":[192],"trained":[200],"without":[201],"need":[203],"any":[205],"additional":[206],"learning":[207],"signals":[208],"wide":[216],"range":[217],"properties":[219],"domains.":[221],"Following":[222],"path,":[224],"seed":[231],"Web.":[237],"Our":[238],"experiments":[239],"show":[240,255],"achieve":[244],"mean":[246],"performance":[247],"84.07":[249],"at":[250],"F1-score.":[251],"Moreover,":[252],"our":[253],"estimations":[254],"potentially":[259],"millions":[261],"validation.":[270],"goal":[272],"help":[275],"editors":[276],"their":[278],"daily":[279],"tasks":[280],"contribute":[282],"completion":[285],"graph.":[290]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
