{"id":"https://openalex.org/W4381328679","doi":"https://doi.org/10.1145/3589786","title":"Steered Training Data Generation for Learned Semantic Type Detection","display_name":"Steered Training Data Generation for Learned Semantic Type Detection","publication_year":2023,"publication_date":"2023-06-13","ids":{"openalex":"https://openalex.org/W4381328679","doi":"https://doi.org/10.1145/3589786"},"language":"en","primary_location":{"id":"doi:10.1145/3589786","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3589786","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084475721","display_name":"Sven Langenecker","orcid":"https://orcid.org/0009-0002-2809-5331"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technical University of Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Sven Langenecker","raw_affiliation_strings":["L\u00c4PPLE AG; DHBW Mosbach; &amp; Technical University of Darmstadt, Heilbronn, Germany","DHBW Mosbach"],"affiliations":[{"raw_affiliation_string":"L\u00c4PPLE AG; DHBW Mosbach; &amp; Technical University of Darmstadt, Heilbronn, Germany","institution_ids":["https://openalex.org/I31512782"]},{"raw_affiliation_string":"DHBW Mosbach","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003359637","display_name":"Christoph Sturm","orcid":"https://orcid.org/0009-0008-5706-3041"},"institutions":[{"id":"https://openalex.org/I4210102060","display_name":"Ansbach University of Applied Sciences","ror":"https://ror.org/0167rnj42","country_code":"DE","type":"education","lineage":["https://openalex.org/I4210102060"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Christoph Sturm","raw_affiliation_strings":["DHBW Mosbach, Mosbach, Germany"],"affiliations":[{"raw_affiliation_string":"DHBW Mosbach, Mosbach, Germany","institution_ids":["https://openalex.org/I4210102060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022339518","display_name":"Christian Schalles","orcid":"https://orcid.org/0009-0005-7036-3012"},"institutions":[{"id":"https://openalex.org/I4210102060","display_name":"Ansbach University of Applied Sciences","ror":"https://ror.org/0167rnj42","country_code":"DE","type":"education","lineage":["https://openalex.org/I4210102060"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Christian Schalles Schalles","raw_affiliation_strings":["DHBW Mosbach, Mosbach, Germany"],"affiliations":[{"raw_affiliation_string":"DHBW Mosbach, Mosbach, Germany","institution_ids":["https://openalex.org/I4210102060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073504200","display_name":"Carsten Binnig","orcid":"https://orcid.org/0000-0002-2744-7836"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technical University of Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Carsten Binnig","raw_affiliation_strings":["Technical University of Darmstadt &amp; DFKI, Darmstadt, Germany"],"affiliations":[{"raw_affiliation_string":"Technical University of Darmstadt &amp; DFKI, Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5084475721"],"corresponding_institution_ids":["https://openalex.org/I31512782"],"apc_list":null,"apc_paid":null,"fwci":0.1748,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.5326948,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"1","issue":"2","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.796554684638977},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6739892959594727},{"id":"https://openalex.org/keywords/semantic-data-model","display_name":"Semantic data model","score":0.6116397976875305},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5846940875053406},{"id":"https://openalex.org/keywords/data-type","display_name":"Data type","score":0.5693541765213013},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5591115951538086},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4746991991996765},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.4631556570529938},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.45957088470458984},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4438028633594513},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.4142683744430542},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3385816216468811},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08756238222122192}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.796554684638977},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6739892959594727},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.6116397976875305},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5846940875053406},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.5693541765213013},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5591115951538086},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4746991991996765},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.4631556570529938},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.45957088470458984},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4438028633594513},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.4142683744430542},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3385816216468811},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08756238222122192},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3589786","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3589786","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1995875735","https://openalex.org/W2398606196","https://openalex.org/W2575168421","https://openalex.org/W2769041395","https://openalex.org/W2898796029","https://openalex.org/W2911424454","https://openalex.org/W2951621897","https://openalex.org/W2959716049","https://openalex.org/W2966329817","https://openalex.org/W3002424897","https://openalex.org/W3033527224","https://openalex.org/W3082424964","https://openalex.org/W3165753548","https://openalex.org/W3168052339","https://openalex.org/W4205922070","https://openalex.org/W4312782109","https://openalex.org/W6600135713"],"related_works":["https://openalex.org/W4317548404","https://openalex.org/W3022007134","https://openalex.org/W2949671220","https://openalex.org/W2130553454","https://openalex.org/W2033364610","https://openalex.org/W2797776314","https://openalex.org/W3163689946","https://openalex.org/W2153927146","https://openalex.org/W3104108945","https://openalex.org/W4390190783"],"abstract_inverted_index":{"In":[0],"this":[1,71],"paper,":[2],"we":[3,98],"introduce":[4],"STEER":[5,19,43,75],"to":[6,13,31,78],"adapt":[7],"learned":[8,81,109],"semantic":[9,26,82],"type":[10,83],"extraction":[11,84],"approaches":[12],"a":[14,21,46],"new,":[15],"unseen":[16],"data":[17,22,36,49,60,74,93,113],"lake.":[18],"provides":[20],"programming":[23],"framework":[24],"for":[25,63,67],"labeling":[27],"which":[28],"is":[29,76],"used":[30],"generate":[32,56],"new":[33],"labeled":[34],"training":[35,48,59,73],"with":[37,45],"minimal":[38],"overhead.":[39],"At":[40],"its":[41],"core,":[42],"comes":[44],"novel":[47],"generation":[50],"procedure":[51],"called":[52],"Steered-Labeling":[53],"that":[54,97],"can":[55,99],"high":[57],"quality":[58],"not":[61],"only":[62],"non-numeric":[64],"but":[65],"also":[66],"numerical":[68],"columns.":[69],"With":[70],"generated":[72],"able":[77],"fine-tune":[79],"existing":[80],"models.":[85],"We":[86],"evaluate":[87],"our":[88],"approach":[89],"on":[90],"four":[91],"different":[92,106],"lakes":[94],"and":[95],"show":[96],"significantly":[100],"improve":[101],"the":[102],"performance":[103],"of":[104,108],"two":[105],"types":[107],"models":[110],"across":[111],"all":[112],"lakes.":[114]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
