{"id":"https://openalex.org/W4414241920","doi":"https://doi.org/10.1145/3768156","title":"Large Language Models in Document Intelligence: A Comprehensive Survey, Recent Advances, Challenges, and Future Trends","display_name":"Large Language Models in Document Intelligence: A Comprehensive Survey, Recent Advances, Challenges, and Future Trends","publication_year":2025,"publication_date":"2025-09-16","ids":{"openalex":"https://openalex.org/W4414241920","doi":"https://doi.org/10.1145/3768156"},"language":"en","primary_location":{"id":"doi:10.1145/3768156","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3768156","pdf_url":null,"source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102753677","display_name":"Wenjun Ke","orcid":"https://orcid.org/0000-0001-7352-1710"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenjun Ke","raw_affiliation_strings":["School of Computer Science and Engineering, Southeast University, Nanjing, China and Key Laboratory of New Generation Artificial Intelligence Technology and Its Interdisciplinary Applications (Southeast University), Ministry of Education, Nanjing, China","School of Computer Science and Engineering, Southeast University, China and Key Laboratory of New Generation Artifcial Intelligence Technology and Its Interdisciplinary Applications (Southeast University), Ministry of Education, China"],"raw_orcid":"https://orcid.org/0000-0001-7352-1710","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Southeast University, Nanjing, China and Key Laboratory of New Generation Artificial Intelligence Technology and Its Interdisciplinary Applications (Southeast University), Ministry of Education, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Southeast University, China and Key Laboratory of New Generation Artifcial Intelligence Technology and Its Interdisciplinary Applications (Southeast University), Ministry of Education, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yifan Zheng","orcid":"https://orcid.org/0009-0002-9474-7377"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifan Zheng","raw_affiliation_strings":["Beijing Institute of Computer Technology and Application, Beijing, China","Beijing Institute of Computer Technology and Application, China"],"raw_orcid":"https://orcid.org/0009-0002-9474-7377","affiliations":[{"raw_affiliation_string":"Beijing Institute of Computer Technology and Application, Beijing, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Beijing Institute of Computer Technology and Application, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020383049","display_name":"Youlan Li","orcid":"https://orcid.org/0009-0000-6191-7232"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yining Li","raw_affiliation_strings":["College of Software Engineering, Southeast University, Nanjing, China","College of Software Engineering, Southeast University, China"],"raw_orcid":"https://orcid.org/0009-0000-6191-7232","affiliations":[{"raw_affiliation_string":"College of Software Engineering, Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]},{"raw_affiliation_string":"College of Software Engineering, Southeast University, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hengyuan Xu","orcid":"https://orcid.org/0009-0001-0373-5956"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hengyuan Xu","raw_affiliation_strings":["College of Software Engineering, Southeast University, Nanjing, China","College of Software Engineering, Southeast University, China"],"raw_orcid":"https://orcid.org/0009-0001-0373-5956","affiliations":[{"raw_affiliation_string":"College of Software Engineering, Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]},{"raw_affiliation_string":"College of Software Engineering, Southeast University, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064564558","display_name":"Dong Nie","orcid":"https://orcid.org/0000-0003-0385-8988"},"institutions":[{"id":"https://openalex.org/I1743320","display_name":"Palo Alto University","ror":"https://ror.org/04f812k67","country_code":"US","type":"education","lineage":["https://openalex.org/I1743320"]},{"id":"https://openalex.org/I4210140397","display_name":"Metrica (United States)","ror":"https://ror.org/031ffw737","country_code":"US","type":"company","lineage":["https://openalex.org/I4210140397"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Nie","raw_affiliation_strings":["Meta Inc, Palo Alto, California, USA","Meta Inc, U.S"],"raw_orcid":"https://orcid.org/0000-0003-0385-8988","affiliations":[{"raw_affiliation_string":"Meta Inc, Palo Alto, California, USA","institution_ids":["https://openalex.org/I1743320"]},{"raw_affiliation_string":"Meta Inc, U.S","institution_ids":["https://openalex.org/I4210140397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058176560","display_name":"Peng Wang","orcid":"https://orcid.org/0000-0001-8782-857X"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Wang","raw_affiliation_strings":["School of Computer Science and Engineering, Southeast University, Nanjing, China and Key Laboratory of New Generation Artificial Intelligence Technology and Its Interdisciplinary Applications (Southeast University), Ministry of Education, Nanjing, China","School of Computer Science and Engineering, Southeast University, China and Key Laboratory of New Generation Artifcial Intelligence Technology and Its Interdisciplinary Applications (Southeast University), Ministry of Education, China"],"raw_orcid":"https://orcid.org/0000-0001-8782-857X","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Southeast University, Nanjing, China and Key Laboratory of New Generation Artificial Intelligence Technology and Its Interdisciplinary Applications (Southeast University), Ministry of Education, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Southeast University, China and Key Laboratory of New Generation Artifcial Intelligence Technology and Its Interdisciplinary Applications (Southeast University), Ministry of Education, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101081420","display_name":"Yao He","orcid":"https://orcid.org/0009-0002-3238-5884"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Yao He","raw_affiliation_strings":["Institute of Collaborative Innovation, University of Macau, Macau, China","Institute of Collaborative Innovation, University of Macau, China"],"raw_orcid":"https://orcid.org/0009-0002-3238-5884","affiliations":[{"raw_affiliation_string":"Institute of Collaborative Innovation, University of Macau, Macau, China","institution_ids":["https://openalex.org/I204512498"]},{"raw_affiliation_string":"Institute of Collaborative Innovation, University of Macau, China","institution_ids":["https://openalex.org/I204512498"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102753677"],"corresponding_institution_ids":["https://openalex.org/I76569877"],"apc_list":null,"apc_paid":null,"fwci":18.6027,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.99128855,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"44","issue":"1","first_page":"1","last_page":"64"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5408999919891357},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5374000072479248},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.46630001068115234},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.3425000011920929},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.33550000190734863}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7371000051498413},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6092000007629395},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5408999919891357},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5374000072479248},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.46630001068115234},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.3425000011920929},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.33550000190734863},{"id":"https://openalex.org/C2767350","wikidata":"https://www.wikidata.org/wiki/Q6662173","display_name":"Business intelligence","level":2,"score":0.33079999685287476},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.30970001220703125},{"id":"https://openalex.org/C55587333","wikidata":"https://www.wikidata.org/wiki/Q1133029","display_name":"Engineering ethics","level":1,"score":0.24150000512599945}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3768156","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3768156","pdf_url":null,"source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3740420203","display_name":null,"funder_award_id":"2242025K30024","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G4168807297","display_name":null,"funder_award_id":"62376057","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":134,"referenced_works":["https://openalex.org/W128638292","https://openalex.org/W1965398296","https://openalex.org/W2047923155","https://openalex.org/W2062361515","https://openalex.org/W2095926335","https://openalex.org/W2133665775","https://openalex.org/W2141756865","https://openalex.org/W2142069714","https://openalex.org/W2143786659","https://openalex.org/W2193145675","https://openalex.org/W2598569220","https://openalex.org/W2623399293","https://openalex.org/W2694849690","https://openalex.org/W2912924812","https://openalex.org/W2963341956","https://openalex.org/W2963899988","https://openalex.org/W2964110616","https://openalex.org/W2979382951","https://openalex.org/W2980913015","https://openalex.org/W2997154779","https://openalex.org/W3003711898","https://openalex.org/W3009518609","https://openalex.org/W3027879771","https://openalex.org/W3082397598","https://openalex.org/W3085139254","https://openalex.org/W3092694106","https://openalex.org/W3098495697","https://openalex.org/W3099700870","https://openalex.org/W3102948621","https://openalex.org/W3104953317","https://openalex.org/W3106250896","https://openalex.org/W3111356309","https://openalex.org/W3113753692","https://openalex.org/W3116857864","https://openalex.org/W3134064484","https://openalex.org/W3138516171","https://openalex.org/W3154280800","https://openalex.org/W3156789018","https://openalex.org/W3161820423","https://openalex.org/W3167404434","https://openalex.org/W3169283738","https://openalex.org/W3174708387","https://openalex.org/W3201053014","https://openalex.org/W3201871940","https://openalex.org/W4205924151","https://openalex.org/W4210451781","https://openalex.org/W4210896998","https://openalex.org/W4213213306","https://openalex.org/W4225104598","https://openalex.org/W4225992558","https://openalex.org/W4252076394","https://openalex.org/W4281758439","https://openalex.org/W4285105124","https://openalex.org/W4285149002","https://openalex.org/W4285199320","https://openalex.org/W4285255856","https://openalex.org/W4287887100","https://openalex.org/W4288042030","https://openalex.org/W4288089799","https://openalex.org/W4290927927","https://openalex.org/W4294294581","https://openalex.org/W4304013646","https://openalex.org/W4307079201","https://openalex.org/W4312233877","https://openalex.org/W4362468670","https://openalex.org/W4382468776","https://openalex.org/W4383502937","https://openalex.org/W4384636790","https://openalex.org/W4384642600","https://openalex.org/W4385237494","https://openalex.org/W4385565351","https://openalex.org/W4385570519","https://openalex.org/W4385570645","https://openalex.org/W4385571271","https://openalex.org/W4385573236","https://openalex.org/W4385990930","https://openalex.org/W4386083123","https://openalex.org/W4386083279","https://openalex.org/W4386566488","https://openalex.org/W4386566590","https://openalex.org/W4386836900","https://openalex.org/W4388748032","https://openalex.org/W4388979610","https://openalex.org/W4389519118","https://openalex.org/W4389519153","https://openalex.org/W4389519226","https://openalex.org/W4389519972","https://openalex.org/W4389520155","https://openalex.org/W4389520758","https://openalex.org/W4389520779","https://openalex.org/W4389523667","https://openalex.org/W4389523718","https://openalex.org/W4389524107","https://openalex.org/W4389524473","https://openalex.org/W4389820972","https://openalex.org/W4389937026","https://openalex.org/W4390690436","https://openalex.org/W4390872501","https://openalex.org/W4390962494","https://openalex.org/W4391096228","https://openalex.org/W4391212494","https://openalex.org/W4391876619","https://openalex.org/W4392206030","https://openalex.org/W4392366638","https://openalex.org/W4392979952","https://openalex.org/W4393160827","https://openalex.org/W4393403588","https://openalex.org/W4395443988","https://openalex.org/W4396988255","https://openalex.org/W4398796934","https://openalex.org/W4399175313","https://openalex.org/W4399836022","https://openalex.org/W4401023643","https://openalex.org/W4401042735","https://openalex.org/W4401042914","https://openalex.org/W4402030074","https://openalex.org/W4402343071","https://openalex.org/W4402671152","https://openalex.org/W4402671806","https://openalex.org/W4402951581","https://openalex.org/W4403088703","https://openalex.org/W4403280574","https://openalex.org/W4404781101","https://openalex.org/W4404781349","https://openalex.org/W4405172832","https://openalex.org/W4405399726","https://openalex.org/W4405595839","https://openalex.org/W4407937981","https://openalex.org/W4408886719","https://openalex.org/W4410356941","https://openalex.org/W4411980533","https://openalex.org/W4412886856","https://openalex.org/W4412886888","https://openalex.org/W4413146877"],"related_works":[],"abstract_inverted_index":{"The":[0,95],"rapid":[1],"proliferation":[2],"of":[3,26,73,87,90],"documents":[4],"has":[5],"made":[6],"document":[7,27,35,93,109],"intelligence":[8],"increasingly":[9],"critical":[10,126],"across":[11],"various":[12],"industries.":[13],"In":[14],"recent":[15],"years,":[16],"Large":[17],"Language":[18],"Models":[19],"(LLMs)":[20],"have":[21,44],"dramatically":[22],"transformed":[23],"the":[24,88,112,137],"field":[25],"intelligence,":[28],"allowing":[29],"for":[30,108,128],"more":[31],"advanced":[32],"and":[33,56,80,105,121,131],"accurate":[34],"processing":[36],"solutions.":[37],"Despite":[38],"these":[39,49],"advancements,":[40],"most":[41],"existing":[42],"surveys":[43],"failed":[45],"to":[46,64,135],"focus":[47],"on":[48,53],"breakthroughs,":[50],"instead":[51],"concentrating":[52],"traditional":[54],"methods":[55],"earlier":[57],"machine":[58],"learning":[59],"techniques.":[60],"This":[61],"survey":[62,113],"seeks":[63],"fill":[65],"that":[66],"gap":[67],"by":[68],"offering":[69,125],"an":[70],"in-depth":[71],"analysis":[72],"approximately":[74],"300":[75],"papers":[76],"published":[77],"between":[78],"2021":[79],"mid-2025,":[81],"thus":[82],"providing":[83],"a":[84],"comprehensive":[85],"overview":[86],"impact":[89],"LLMs":[91,107],"in":[92],"intelligence.":[94],"key":[96],"topics":[97],"explored":[98],"include":[99],"Retrieval-Augmented":[100],"Generation":[101],"(RAG),":[102],"long-context":[103],"processing,":[104],"fine-tuning":[106],"comprehension.":[110],"Furthermore,":[111],"highlights":[114],"essential":[115],"datasets,":[116],"practical":[117],"applications,":[118],"current":[119],"challenges,":[120],"future":[122],"research":[123],"directions,":[124],"insights":[127],"both":[129],"researchers":[130],"industry":[132],"practitioners":[133],"looking":[134],"advance":[136],"field.":[138]},"counts_by_year":[{"year":2026,"cited_by_count":10}],"updated_date":"2026-05-28T09:10:13.091523","created_date":"2025-10-10T00:00:00"}
