{"id":"https://openalex.org/W2777092996","doi":"https://doi.org/10.1145/3148011.3154470","title":"A Supervised Learning Approach To Entity Matching Between Scholarly Big Datasets","display_name":"A Supervised Learning Approach To Entity Matching Between Scholarly Big Datasets","publication_year":2017,"publication_date":"2017-12-04","ids":{"openalex":"https://openalex.org/W2777092996","doi":"https://doi.org/10.1145/3148011.3154470","mag":"2777092996"},"language":"en","primary_location":{"id":"doi:10.1145/3148011.3154470","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3148011.3154470","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Knowledge Capture Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075242841","display_name":"Jian Wu","orcid":"https://orcid.org/0000-0003-0173-4463"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jian Wu","raw_affiliation_strings":["IST, Pennsylvania State University, University Park, PA, USA"],"affiliations":[{"raw_affiliation_string":"IST, Pennsylvania State University, University Park, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072607871","display_name":"Athar Sefid","orcid":null},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Athar Sefid","raw_affiliation_strings":["CSE, Pennsylvania State University, University Park, PA, USA"],"affiliations":[{"raw_affiliation_string":"CSE, Pennsylvania State University, University Park, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086240210","display_name":"Allen C. Ge","orcid":null},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Allen C. Ge","raw_affiliation_strings":["IST, Pennsylvania State University, University Park, PA, USA"],"affiliations":[{"raw_affiliation_string":"IST, Pennsylvania State University, University Park, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001294898","display_name":"C. Lee Giles","orcid":"https://orcid.org/0000-0002-1931-585X"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C. Lee Giles","raw_affiliation_strings":["IST, Pennsylvania State University, University Park, PA, USA and CSE, Pennsylvania State University, University Park, PA, USA"],"affiliations":[{"raw_affiliation_string":"IST, Pennsylvania State University, University Park, PA, USA and CSE, Pennsylvania State University, University Park, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5075242841"],"corresponding_institution_ids":["https://openalex.org/I130769515"],"apc_list":null,"apc_paid":null,"fwci":1.2916,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.83295561,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8510667085647583},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.8092586994171143},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6577039957046509},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5710283517837524},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.5091983675956726},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.47852882742881775},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.46138671040534973},{"id":"https://openalex.org/keywords/digital-library","display_name":"Digital library","score":0.45504772663116455},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3993716537952423},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3876059055328369},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.306923508644104},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.17516669631004333}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8510667085647583},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.8092586994171143},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6577039957046509},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5710283517837524},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.5091983675956726},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.47852882742881775},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.46138671040534973},{"id":"https://openalex.org/C513874922","wikidata":"https://www.wikidata.org/wiki/Q212805","display_name":"Digital library","level":3,"score":0.45504772663116455},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3993716537952423},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3876059055328369},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.306923508644104},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.17516669631004333},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C164913051","wikidata":"https://www.wikidata.org/wiki/Q482","display_name":"Poetry","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3148011.3154470","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3148011.3154470","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Knowledge Capture Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1547556487","https://openalex.org/W1547612978","https://openalex.org/W1603719052","https://openalex.org/W2011480553","https://openalex.org/W2012833704","https://openalex.org/W2029020160","https://openalex.org/W2042641525","https://openalex.org/W2080099271","https://openalex.org/W2098564132","https://openalex.org/W2140479099","https://openalex.org/W2168065722","https://openalex.org/W2253675773","https://openalex.org/W2295508865","https://openalex.org/W2411309731","https://openalex.org/W2520300089","https://openalex.org/W2547426272","https://openalex.org/W2574051726","https://openalex.org/W2592249290","https://openalex.org/W2615071493","https://openalex.org/W2724671831","https://openalex.org/W3105274952","https://openalex.org/W4241886172"],"related_works":["https://openalex.org/W2392768766","https://openalex.org/W2058118494","https://openalex.org/W2382021449","https://openalex.org/W2095118173","https://openalex.org/W2104269053","https://openalex.org/W2106424170","https://openalex.org/W1985426483","https://openalex.org/W2501188010","https://openalex.org/W4299935056","https://openalex.org/W2145752793"],"abstract_inverted_index":{"Bibliography":[0],"metadata":[1,35,50,87],"in":[2,7,72,88],"scientific":[3],"documents":[4,32],"are":[5,41],"essential":[6],"indexing":[8],"and":[9,19,112,127],"retrieval":[10,141],"of":[11,31,97,110,119,136],"scholarly":[12],"big":[13],"data":[14],"for":[15],"production":[16],"search":[17,26],"engines":[18,27],"bibliometrics":[20],"research":[21],"studies.":[22],"Crawl-based":[23],"digital":[24],"library":[25],"can":[28,81],"harvest":[29],"millions":[30],"efficiently":[33],"but":[34],"information":[36,140],"extracted":[37,99],"by":[38,145],"automatic":[39,104],"extractors":[40],"often":[42],"noisy,":[43],"incomplete,":[44],"and/or":[45],"with":[46,134,149],"parsing":[47],"errors.":[48],"These":[49],"could":[51],"be":[52,83],"cleaned":[53],"given":[54],"a":[55,63,73,77,95],"reference":[56,78],"database.":[57,91],"In":[58],"this":[59],"work,":[60],"we":[61],"develop":[62],"supervised":[64],"machine":[65],"learning":[66],"based":[67,143],"approach":[68,93],"to":[69,76,85],"match":[70],"entities":[71],"target":[74,90],"database":[75],"database,":[79],"which":[80],"further":[82],"used":[84],"clean":[86],"the":[89,116],"The":[92],"leverages":[94],"number":[96],"features":[98],"from":[100,103],"headers":[101],"available":[102],"extraction":[105],"results.":[106],"By":[107],"adjusting":[108],"combinations":[109],"hyper-parameters":[111],"various":[113],"sampling":[114],"strategies,":[115],"best":[117],"results":[118],"Support":[120],"Vector":[121],"Machines,":[122],"Logistic":[123],"Regression,":[124],"Random":[125],"Forests,":[126],"Na\u00efve":[128],"Bayes":[129],"models":[130],"give":[131],"comparable":[132],"results,":[133],"F1-measure":[135],"about":[137,146],"90%,":[138],"outperforming":[139],"only":[142],"method":[144],"14%,":[147],"evaluated":[148],"cross":[150],"validation.":[151]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
