{"id":"https://openalex.org/W4393481327","doi":"https://doi.org/10.5281/zenodo.8222854","title":"Benchmark for Pairs of Papers in Semantic Scholar: 1 hop vs. 2-4 hops version 0.0","display_name":"Benchmark for Pairs of Papers in Semantic Scholar: 1 hop vs. 2-4 hops version 0.0","publication_year":2023,"publication_date":"2023-08-07","ids":{"openalex":"https://openalex.org/W4393481327","doi":"https://doi.org/10.5281/zenodo.8222854"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:8222854","is_oa":true,"landing_page_url":"https://zenodo.org/record/8222854","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/8222854","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016543371","display_name":"Kenneth Church","orcid":"https://orcid.org/0000-0001-8378-6069"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":true,"raw_author_name":"Church, Kenneth","raw_affiliation_strings":["Northeastern University"],"raw_orcid":"https://orcid.org/0000-0001-8378-6069","affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021414027","display_name":"Peter Vickers","orcid":"https://orcid.org/0000-0003-0479-7363"},"institutions":[{"id":"https://openalex.org/I91136226","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044","country_code":"GB","type":"education","lineage":["https://openalex.org/I91136226"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Vickers, Peter","raw_affiliation_strings":["Sheffield University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sheffield University","institution_ids":["https://openalex.org/I91136226"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006038779","display_name":"Rodolfo Zevallos","orcid":"https://orcid.org/0000-0003-0192-7740"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Universitat Pompeu Fabra","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Zevallos, Rodolfo","raw_affiliation_strings":["Universitat Pompeu Fabra"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universitat Pompeu Fabra","institution_ids":["https://openalex.org/I170486558"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5016543371"],"corresponding_institution_ids":["https://openalex.org/I87182695"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.8519999980926514,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.8519999980926514,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hop","display_name":"Hop (telecommunications)","score":0.6225516200065613},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5986495018005371},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5363532304763794},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4100296199321747},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.32707756757736206},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.17789414525032043},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.11037793755531311},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.10854282975196838}],"concepts":[{"id":"https://openalex.org/C25906391","wikidata":"https://www.wikidata.org/wiki/Q1432381","display_name":"Hop (telecommunications)","level":2,"score":0.6225516200065613},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5986495018005371},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5363532304763794},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4100296199321747},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32707756757736206},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.17789414525032043},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.11037793755531311},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.10854282975196838}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:zenodo.org:8222854","is_oa":true,"landing_page_url":"https://zenodo.org/record/8222854","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.8222854","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.8222854","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:8222854","is_oa":true,"landing_page_url":"https://zenodo.org/record/8222854","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2378211422","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W972276598","https://openalex.org/W2087343574","https://openalex.org/W4246352526"],"abstract_inverted_index":{"<strong>Benchmark":[0],"for":[1,104,234],"Pairs":[2],"of":[3,58,182,190,237],"Papers":[4,72],"in":[5,47,89,96,211,223,258],"Semantic":[6,36],"Scholar:":[7],"1":[8,39,112,132,137,147,187],"hop":[9],"vs.":[10],"2-4":[11],"hops":[12],"(version":[13],"0.0)</strong>":[14],"There":[15,78],"are":[16,32,55,73,79,254],"two":[17,45,60],"files:":[18],"valid.txt":[19,111],"and":[20,30,53,70,92,215,239],"test.txt;":[21],"both":[22],"files":[23],"use":[24],"the":[25,41,44,48,56,59,86,93,176],"same":[26],"format.":[27],"Columns":[28,51],"2":[29,117,142,152],"3":[31,122,157],"corpus":[33],"ids":[34],"from":[35,188],"Scholar.":[37],"Column":[38],"is":[40,65,101,164,178,243],"distance":[42,185,193],"between":[43,68,213],"papers":[46,82,88,95,105,183,191],"citation":[49],"index.":[50],"4":[52,127,146],"5":[54],"bins":[57,212,225],"paper,":[61],"respectively.":[62],"The":[63,252],"bin":[64,90,97],"a":[66,102,167,207,259],"number":[67],"0":[69,136,201,214,226],"100.":[71],"sorted":[74],"by":[75],"publication":[76,108],"date.":[77],"about":[80],"2M":[81],"per":[83],"bin,":[84,168],"with":[85,106,184,192],"oldest":[87],"0,":[91],"newest":[94],"99.":[98,230],"Bin":[99],"100":[100],"catch-all":[103],"unknown":[107],"dates.":[109],"head":[110],"248518397":[113,118,123,128],"1041744":[114],"97":[115,120,125,130],"51":[116],"23848439":[119],"21":[121],"4235810":[124],"12":[126],"82079949":[129],"11":[131],"3374228":[133],"140728989":[134],"79":[135],"68334187":[138,143],"36144275":[139],"58":[140,145],"34":[141],"7008060":[144],"205881482":[148,153,158],"94036919":[149],"77":[150,155,160],"72":[151],"95069173":[154],"53":[156],"53480264":[159],"52":[161],"Each":[162],"row":[163],"assigned":[165],"to":[166,179],"B,":[169],"where":[170],"B":[171],"=":[172],"max(col4,":[173],"col5).":[174],"<strong>Task</strong>:":[175],"task":[177],"distinguish":[180],"pairs":[181,189],"==":[186,247,250],"&gt;":[194],"1.":[195],"<strong>Test/Train":[196],"splits</strong>:":[197],"For":[198],"all":[199,224,235],"thresholds,":[200],"&lt;=":[202,204,227,229],"T_{train}":[203,216,238],"99,":[205],"train":[206],"model":[208],"on":[209,221],"rows":[210,222,257],"(inclusively).":[217],"Test":[218],"these":[219],"models":[220],"T_{test}":[228],"Report":[231],"average":[232],"accuracy":[233],"combinations":[236],"T_{test}.":[240],"Average":[241],"Accuracy":[242],"defined":[244],"as:":[245],"mean(Predict(row)":[246],"1,":[248],"Gold(row)":[249],"1)":[251],"means":[253],"computed":[255],"over":[256],"test":[260],"bin.":[261]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
