{"id":"https://openalex.org/W3147162460","doi":"https://doi.org/10.18420/btw2021-17","title":"Towards Learned Metadata Extraction for Data Lakes","display_name":"Towards Learned Metadata Extraction for Data Lakes","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3147162460","doi":"https://doi.org/10.18420/btw2021-17","mag":"3147162460"},"language":"en","primary_location":{"id":"doi:10.18420/btw2021-17","is_oa":true,"landing_page_url":"https://doi.org/10.18420/btw2021-17","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.18420/btw2021-17","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084475721","display_name":"Sven Langenecker","orcid":"https://orcid.org/0009-0002-2809-5331"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Langenecker, Sven","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003359637","display_name":"Christoph Sturm","orcid":"https://orcid.org/0009-0008-5706-3041"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sturm, Christoph","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022339518","display_name":"Christian Schalles","orcid":"https://orcid.org/0009-0005-7036-3012"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schalles, Christian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5073504200","display_name":"Carsten Binnig","orcid":"https://orcid.org/0000-0002-2744-7836"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Binnig, Carsten","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5084475721"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2238,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.82824356,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7625349760055542},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5724613666534424},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.4937685430049896},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.35614943504333496},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3450382649898529},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2893858253955841}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7625349760055542},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5724613666534424},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.4937685430049896},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.35614943504333496},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3450382649898529},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2893858253955841},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18420/btw2021-17","is_oa":true,"landing_page_url":"https://doi.org/10.18420/btw2021-17","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.18420/btw2021-17","is_oa":true,"landing_page_url":"https://doi.org/10.18420/btw2021-17","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3213435853","https://openalex.org/W3155217628","https://openalex.org/W2499582922","https://openalex.org/W2613704683","https://openalex.org/W2024018932","https://openalex.org/W3119631386","https://openalex.org/W1604098019","https://openalex.org/W843410015","https://openalex.org/W2164259917","https://openalex.org/W2293050121","https://openalex.org/W1989592829","https://openalex.org/W118804835","https://openalex.org/W2790000818"],"abstract_inverted_index":{"An":[0],"important":[1],"task":[2],"for":[3,37,127],"enabling":[4],"the":[5,22,30,92,131,134,141,207],"efficient":[6],"exploration":[7],"of":[8,33,52,94,130,133,144,183,190,209],"available":[9,23,154],"data":[10,13,24,43,72,75,110,125,136,158,166,169,200,218],"in":[11,73,161],"a":[12,88,95,108,175,180],"lake":[14],"is":[15,59,119],"to":[16,21,28,69,82,122,196,205],"annotate":[17],"semantic":[18,124,211],"type":[19,212],"information":[20],"sources.":[25],"In":[26,77,112],"order":[27],"reduce":[29],"manual":[31,203],"overhead":[32],"annotation,":[34],"learned":[35,54,210],"approaches":[36,55,66,147,214],"automatic":[38],"metadata":[39],"extraction":[40,213],"on":[41,104,153,215],"structured":[42],"sources":[44],"have":[45],"been":[46],"proposed":[47],"recently.":[48],"While":[49],"initial":[50,192],"results":[51,139,189],"these":[53,65],"seem":[56],"promising,":[57],"it":[58],"still":[60],"not":[61],"clear":[62],"how":[63],"well":[64],"can":[67],"generalize":[68],"new":[70,181,216],"unseen":[71,217],"real-world":[74,109,135],"lakes.":[76],"this":[78,84],"paper,":[79],"we":[80,115,178,194],"aim":[81],"tackle":[83],"question":[85],"and":[86,156,167,187],"as":[87,174],"first":[89],"contribution":[90,177],"show":[91,116,140],"result":[93],"study":[96,114],"when":[97],"applying":[98],"Sato":[99,118],"-a":[100],"recent":[101],"approach":[102],"based":[103],"deep":[105,145],"learning":[106,146],"-to":[107],"set.":[111,137],"our":[113],"that":[117],"only":[120],"able":[121],"extract":[123],"types":[126],"about":[128],"10%":[129],"columns":[132],"These":[138],"general":[142],"limitation":[143],"which":[148],"often":[149,170],"provide":[150],"near-perfect":[151],"performance":[152,208],"training":[155,165,199],"testing":[157],"but":[159],"fail":[160],"real":[162,168],"settings":[163],"since":[164],"strongly":[171],"vary.":[172],"Hence,":[173],"second":[176],"propose":[179],"direction":[182],"using":[184],"weak":[185],"supervision":[186],"present":[188],"an":[191],"prototype":[193],"built":[195],"generate":[197],"labeled":[198],"with":[201],"low":[202],"efforts":[204],"improve":[206],"sets.":[219]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
