{"id":"https://openalex.org/W7118783085","doi":"https://doi.org/10.48550/arxiv.2601.01897","title":"A Hybrid Architecture for Multi-Stage Claim Document Understanding: Combining Vision-Language Models and Machine Learning for Real-Time Processing","display_name":"A Hybrid Architecture for Multi-Stage Claim Document Understanding: Combining Vision-Language Models and Machine Learning for Real-Time Processing","publication_year":2026,"publication_date":"2026-01-05","ids":{"openalex":"https://openalex.org/W7118783085","doi":"https://doi.org/10.48550/arxiv.2601.01897"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.01897","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.01897","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.01897","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5095109074","display_name":"Lilu Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cheng, Lilu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068091318","display_name":"Jingjun Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Jingjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122131163","display_name":"Yi Xuan Chan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chan, Yi Xuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080009711","display_name":"Qu\u1ed1c Kh\u1ea3i Nguy\u1ec5n","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Quoc Khai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122088988","display_name":"John Bi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bi, John","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5035290663","display_name":"Sean Ho","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ho, Sean","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5095109074"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9467999935150146,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9467999935150146,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.004800000227987766,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.003800000064074993,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6208000183105469},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.4528000056743622},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.42239999771118164},{"id":"https://openalex.org/keywords/obstacle","display_name":"Obstacle","score":0.39570000767707825},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.3659999966621399},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.3646000027656555},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.3538999855518341},{"id":"https://openalex.org/keywords/image-processing","display_name":"Image processing","score":0.32269999384880066}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8295000195503235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7017999887466431},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6208000183105469},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6100000143051147},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.4528000056743622},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.42239999771118164},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.39570000767707825},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3659999966621399},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.3646000027656555},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3538999855518341},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.32269999384880066},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3208000063896179},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.3163999915122986},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.3068000078201294},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.30630001425743103},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3046000003814697},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.2897999882698059},{"id":"https://openalex.org/C2779500292","wikidata":"https://www.wikidata.org/wiki/Q14802672","display_name":"Text processing","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.2872999906539917},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.2831999957561493},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.2831000089645386},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2639000117778778},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.25220000743865967}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.01897","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.01897","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.01897","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.01897","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Claims":[0],"documents":[1,20],"are":[2,21],"fundamental":[3],"to":[4,48,107,144,189],"healthcare":[5],"and":[6,16,91,100,110,136,147,167,223,238,250],"insurance":[7],"operations,":[8],"serving":[9],"as":[10,29,52,54],"the":[11,79,123],"basis":[12],"for":[13,225],"reimbursement,":[14],"auditing,":[15],"compliance.":[17],"However,":[18],"these":[19],"typically":[22,193],"not":[23],"born":[24],"digital;":[25],"they":[26,39],"often":[27],"exist":[28],"scanned":[30],"PDFs":[31],"or":[32],"photographs":[33],"captured":[34],"under":[35,183],"uncontrolled":[36],"conditions.":[37],"Consequently,":[38],"exhibit":[40],"significant":[41,105],"content":[42],"heterogeneity,":[43],"ranging":[44],"from":[45,151,248],"typed":[46],"invoices":[47],"handwritten":[49],"medical":[50],"reports,":[51],"well":[53],"linguistic":[55],"diversity.":[56],"This":[57,114],"challenge":[58],"is":[59,239],"exemplified":[60],"by":[61],"operations":[62],"at":[63],"Fullerton":[64],"Health,":[65],"which":[66,192],"handles":[67],"tens":[68,242],"of":[69,71,163,172,182,243,245],"millions":[70],"claims":[72,153,246],"annually":[73],"across":[74],"nine":[75],"markets,":[76],"including":[77],"Singapore,":[78],"Philippines,":[80],"Indonesia,":[81],"Malaysia,":[82],"Mainland":[83],"China,":[84],"Hong":[85],"Kong,":[86],"Vietnam,":[87],"Papua":[88],"New":[89],"Guinea,":[90],"Cambodia.":[92],"Such":[93],"variability,":[94],"coupled":[95],"with":[96,217],"inconsistent":[97],"image":[98],"quality":[99],"diverse":[101],"layouts,":[102],"poses":[103],"a":[104,117,131,137,159,168,203],"obstacle":[106],"automated":[108],"parsing":[109],"structured":[111],"information":[112],"extraction.":[113],"paper":[115],"presents":[116],"robust":[118],"multi-stage":[119],"pipeline":[120],"that":[121,211],"integrates":[122],"multilingual":[124],"optical":[125],"character":[126],"recognition":[127],"(OCR)":[128],"engine":[129],"PaddleOCR,":[130],"traditional":[132,213],"Logistic":[133],"Regression":[134],"classifier,":[135],"compact":[138],"Vision-Language":[139],"Model":[140],"(VLM),":[141],"Qwen":[142],"2.5-VL-7B,":[143],"achieve":[145],"efficient":[146],"accurate":[148],"field":[149],"extraction":[150,170],"large-scale":[152],"data.":[154],"The":[155,228],"proposed":[156],"system":[157,201],"achieves":[158],"document-type":[160],"classification":[161],"accuracy":[162,171,222],"over":[164],"95":[165],"percent":[166],"field-level":[169],"approximately":[173],"87":[174],"percent,":[175],"while":[176],"maintaining":[177],"an":[178],"average":[179],"processing":[180,241],"latency":[181],"2":[184],"seconds":[185],"per":[186,198],"document.":[187],"Compared":[188],"manual":[190],"processing,":[191],"requires":[194],"around":[195],"10":[196],"minutes":[197],"claim,":[199],"our":[200,235],"delivers":[202],"300x":[204],"improvement":[205],"in":[206,234],"efficiency.":[207],"These":[208],"results":[209],"demonstrate":[210],"combining":[212],"machine":[214],"learning":[215],"models":[216],"modern":[218],"VLMs":[219],"enables":[220],"production-grade":[221],"speed":[224],"real-world":[226],"automation.":[227],"solution":[229],"has":[230],"been":[231],"successfully":[232],"deployed":[233],"mobile":[236],"application":[237],"currently":[240],"thousands":[244],"weekly":[247],"Vietnam":[249],"Singapore.":[251]},"counts_by_year":[],"updated_date":"2026-01-08T20:10:11.968330","created_date":"2026-01-08T00:00:00"}
