{"id":"https://openalex.org/W4416034011","doi":"https://doi.org/10.18653/v1/2025.findings-emnlp.973","title":"Problem Solved? Information Extraction Design Space for Layout-Rich Documents using LLMs","display_name":"Problem Solved? Information Extraction Design Space for Layout-Rich Documents using LLMs","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416034011","doi":"https://doi.org/10.18653/v1/2025.findings-emnlp.973"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-emnlp.973","is_oa":false,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.973","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hdl.handle.net/11475/35284","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119913439","display_name":"Gaye Colakoglu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gaye Colakoglu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050770083","display_name":"G\u00fcrkan Solmaz","orcid":"https://orcid.org/0000-0002-5341-7913"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"G\u00fcrkan Solmaz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5010965443","display_name":"Jonathan F\u00fcrst","orcid":"https://orcid.org/0000-0002-4062-1827"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jonathan F\u00fcrst","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":7.0768,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.97049959,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"17908","last_page":"17927"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.3154999911785126,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.3154999911785126,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.07909999787807465,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.066600002348423,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.5004000067710876},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.42080000042915344},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.30730000138282776},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.28139999508857727}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5986999869346619},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.5004000067710876},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.42080000042915344},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3675999939441681},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3587000072002411},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34389999508857727},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.30730000138282776},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C180198813","wikidata":"https://www.wikidata.org/wiki/Q121182","display_name":"Information system","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.24690000712871552}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18653/v1/2025.findings-emnlp.973","is_oa":false,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.973","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},{"id":"pmh:oai:digitalcollection.zhaw.ch:11475/35284","is_oa":true,"landing_page_url":"https://hdl.handle.net/11475/35284","pdf_url":null,"source":{"id":"https://openalex.org/S4306401810","display_name":"Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften digital collection (Zurich University of Applied Sciences)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200744771","host_organization_name":"ZHAW Zurich University of Applied Sciences","host_organization_lineage":["https://openalex.org/I200744771"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:digitalcollection.zhaw.ch:11475/35284","is_oa":true,"landing_page_url":"https://hdl.handle.net/11475/35284","pdf_url":null,"source":{"id":"https://openalex.org/S4306401810","display_name":"Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften digital collection (Zurich University of Applied Sciences)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200744771","host_organization_name":"ZHAW Zurich University of Applied Sciences","host_organization_lineage":["https://openalex.org/I200744771"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"defines":[2],"and":[3,36,45,60],"explores":[4],"the":[5,43,65,98,105,122,148,158,170],"design":[6,69],"space":[7],"for":[8],"information":[9],"extraction":[10],"(IE)":[11],"from":[12],"layout-rich":[13],"documents":[14],"using":[15,121],"large":[16],"language":[17],"models":[18],"(LLMs).":[19],"The":[20,86],"three":[21],"core":[22,49],"challenges":[23],"of":[24,58,67,97,157,172],"layout-aware":[25,76],"IE":[26,77,84,90,99],"with":[27,109,153],"LLMs":[28,94,168],"are":[29],"1)":[30],"data":[31],"structuring,":[32],"2)":[33],"model":[34],"engagement,":[35],"3)":[37],"output":[38],"refinement.":[39],"Our":[40,140,180],"study":[41],"investigates":[42],"sub-problems":[44],"methods":[46],"within":[47],"these":[48],"challenges,":[50],"such":[51],"as":[52],"input":[53],"representation,":[54],"chunking,":[55],"prompting,":[56],"selection":[57],"LLMs,":[59],"multimodal":[61],"models.":[62,85],"It":[63],"examines":[64],"effect":[66],"different":[68],"choices":[70],"through":[71],"LayIE-LLM,":[72],"a":[73,117,127,132,154,176],"new,":[74],"open-source,":[75],"test":[78],"suite,":[79],"benchmarking":[80],"against":[81],"traditional,":[82],"fine-tuned":[83],"results":[87],"on":[88],"two":[89],"datasets":[91],"show":[92],"that":[93,136],"require":[95],"adjustment":[96],"pipeline":[100],"to":[101],"achieve":[102],"competitive":[103],"performance:":[104],"optimized":[106],"configuration":[107,120],"found":[108],"LayIE-LLM":[110],"achieves":[111,137],"13.3\u201337.5":[112],"F1":[113],"points":[114,145],"more":[115],"than":[116,147],"general-practice":[118],"baseline":[119],"same":[123],"LLM.":[124],"To":[125],"find":[126],"well-working":[128],"configuration,":[129],"we":[130,162],"develop":[131],"one-factor-at-a-time":[133],"(OFAT)":[134],"method":[135,141],"near-optimal":[138],"results.":[139],"is":[142,182],"only":[143],"0.8\u20131.8":[144],"lower":[146],"best":[149],"full":[150],"factorial":[151],"exploration":[152],"fraction":[155],"(~2.8%)":[156],"required":[159],"computation.":[160],"Overall,":[161],"demonstrate":[163],"that,":[164],"if":[165],"well-configured,":[166],"general-purpose":[167],"match":[169],"performance":[171],"specialized":[173],"models,":[174],"providing":[175],"cost-effective,":[177],"finetuning-free":[178],"alternative.":[179],"test-suite":[181],"available":[183],"at":[184],"https://github.com/gayecolakoglu/LayIE-LLM":[185]},"counts_by_year":[{"year":2026,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-08T00:00:00"}
