{"id":"https://openalex.org/W7140112598","doi":"https://doi.org/10.18653/v1/2026.eacl-long.154","title":"How Much Pretraining Does Structured Data Need?","display_name":"How Much Pretraining Does Structured Data Need?","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7140112598","doi":"https://doi.org/10.18653/v1/2026.eacl-long.154"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2026.eacl-long.154","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.154","pdf_url":"https://aclanthology.org/2026.eacl-long.154.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2026.eacl-long.154.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126342927","display_name":"Daniel Fadlon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daniel Fadlon","raw_affiliation_strings":["School of Computer Science , Reichman University , Herzliya , Israel"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science , Reichman University , Herzliya , Israel","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5130330088","display_name":"Kfir Bar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kfir Bar","raw_affiliation_strings":["School of Computer Science , Reichman University , Herzliya , Israel"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science , Reichman University , Herzliya , Israel","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.40270627,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3352","last_page":"3365"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13405","display_name":"Educational Assessment and Improvement","score":0.04170000180602074,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13405","display_name":"Educational Assessment and Improvement","score":0.04170000180602074,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11875","display_name":"Statistics Education and Methodologies","score":0.03009999915957451,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14201","display_name":"Data Analysis and Archiving","score":0.023800000548362732,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/structured-prediction","display_name":"Structured prediction","score":0.7937999963760376},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.7088000178337097},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6359000205993652},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.38510000705718994},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3815999925136566},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.3538999855518341}],"concepts":[{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.7937999963760376},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.7088000178337097},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7046999931335449},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6359000205993652},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5486000180244446},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5245000123977661},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4074000120162964},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.38510000705718994},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3815999925136566},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.3538999855518341},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.3240000009536743},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C40077939","wikidata":"https://www.wikidata.org/wiki/Q2336004","display_name":"Semi-structured data","level":3,"score":0.29350000619888306},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C177877439","wikidata":"https://www.wikidata.org/wiki/Q7604413","display_name":"Statistical relational learning","level":3,"score":0.2720000147819519}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2026.eacl-long.154","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.154","pdf_url":"https://aclanthology.org/2026.eacl-long.154.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2026.eacl-long.154","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.154","pdf_url":"https://aclanthology.org/2026.eacl-long.154.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7140112598.pdf","grobid_xml":"https://content.openalex.org/works/W7140112598.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"are":[4],"increasingly":[5],"adopted":[6],"for":[7,73],"handling":[8],"structured":[9,39,62,74],"data,":[10,75],"including":[11],"tabular":[12],"and":[13,64],"relational":[14],"inputs,":[15],"despite":[16],"mostly":[17],"being":[18],"pretrained":[19,30,77,98],"on":[20,61],"unstructured":[21,68,91],"text.This":[22],"raises":[23],"a":[24],"key":[25],"question:":[26],"how":[27],"effectively":[28],"do":[29],"representations":[31],"from":[32,96],"languagefocused":[33],"LLMs":[34],"transfer":[35],"to":[36,67],"tasks":[37,92],"involving":[38],"inputs?We":[40],"address":[41],"this":[42],"through":[43],"controlled":[44],"experiments":[45],"using":[46],"two":[47],"small":[48],"opensource":[49],"LLMs,":[50],"systematically":[51],"re-initializing":[52],"subsets":[53],"of":[54],"layers":[55],"with":[56,81,106],"random":[57],"weights":[58],"before":[59],"finetuning":[60],"datasets":[63],"comparing":[65],"results":[66],"datasets.Our":[69],"analyses":[70],"show":[71],"that,":[72],"most":[76],"depth":[78],"contributes":[79],"little,":[80],"performance":[82],"often":[83],"saturating":[84],"after":[85],"the":[86],"first":[87],"few":[88],"layers,":[89],"whereas":[90],"benefit":[93],"more":[94,111],"consistently":[95],"deeper":[97],"representations.Pretraining":[99],"remains":[100],"useful":[101],"mainly":[102],"in":[103],"low-resource":[104],"settings,":[105],"its":[107],"impact":[108],"diminishing":[109],"as":[110],"training":[112],"data":[113],"becomes":[114],"available.":[115]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-24T00:00:00"}
