{"id":"https://openalex.org/W3009005296","doi":"https://doi.org/10.1109/bigdata47090.2019.9006064","title":"Scalable Document Image Information Extraction with Application to Domain-Specific Analysis","display_name":"Scalable Document Image Information Extraction with Application to Domain-Specific Analysis","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W3009005296","doi":"https://doi.org/10.1109/bigdata47090.2019.9006064","mag":"3009005296"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata47090.2019.9006064","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9006064","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056833841","display_name":"Yingbin Zheng","orcid":"https://orcid.org/0000-0002-5590-9292"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yingbin Zheng","raw_affiliation_strings":["Videt Tech Ltd., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Videt Tech Ltd., Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001764386","display_name":"Shuchen Kong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shuchen Kong","raw_affiliation_strings":["Videt Tech Ltd., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Videt Tech Ltd., Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086624600","display_name":"Wanshan Zhu","orcid":"https://orcid.org/0000-0002-2037-2085"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wanshan Zhu","raw_affiliation_strings":["Videt Tech Ltd., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Videt Tech Ltd., Shanghai, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100427158","display_name":"Hao Ye","orcid":"https://orcid.org/0000-0002-5939-4708"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao Ye","raw_affiliation_strings":["Videt Tech Ltd., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Videt Tech Ltd., Shanghai, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5056833841"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2044,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.57293982,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":95},"biblio":{"volume":"2019","issue":null,"first_page":"5108","last_page":"5115"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8814985156059265},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6982866525650024},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6784873604774475},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5975519418716431},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5801134705543518},{"id":"https://openalex.org/keywords/document-layout-analysis","display_name":"Document layout analysis","score":0.5430062413215637},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5389756560325623},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4897444248199463},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.44334176182746887},{"id":"https://openalex.org/keywords/keyword-extraction","display_name":"Keyword extraction","score":0.42840832471847534},{"id":"https://openalex.org/keywords/document-structure-description","display_name":"Document Structure Description","score":0.42241063714027405},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.41080015897750854},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3954271078109741},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33536893129348755},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.297130286693573},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.22125980257987976},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.19484570622444153},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.13816988468170166},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.09601083397865295}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8814985156059265},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6982866525650024},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6784873604774475},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5975519418716431},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5801134705543518},{"id":"https://openalex.org/C72773152","wikidata":"https://www.wikidata.org/wiki/Q5287629","display_name":"Document layout analysis","level":3,"score":0.5430062413215637},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5389756560325623},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4897444248199463},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.44334176182746887},{"id":"https://openalex.org/C2780288562","wikidata":"https://www.wikidata.org/wiki/Q25053353","display_name":"Keyword extraction","level":2,"score":0.42840832471847534},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.42241063714027405},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.41080015897750854},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3954271078109741},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33536893129348755},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.297130286693573},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.22125980257987976},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.19484570622444153},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.13816988468170166},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.09601083397865295},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata47090.2019.9006064","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9006064","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"mag:3040812949","is_oa":false,"landing_page_url":"https://jglobal.jst.go.jp/en/detail?JGLOBAL_ID=202002252787847385","pdf_url":null,"source":{"id":"https://openalex.org/S4306512817","display_name":"IEEE Conference Proceedings","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"IEEE Conference Proceedings","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7400000095367432,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1491389626","https://openalex.org/W1571689097","https://openalex.org/W1686810756","https://openalex.org/W1922126009","https://openalex.org/W1998042868","https://openalex.org/W2001642682","https://openalex.org/W2061802763","https://openalex.org/W2112692009","https://openalex.org/W2124404372","https://openalex.org/W2140567548","https://openalex.org/W2142159465","https://openalex.org/W2194187530","https://openalex.org/W2194775991","https://openalex.org/W2343052201","https://openalex.org/W2508450616","https://openalex.org/W2519818067","https://openalex.org/W2593539516","https://openalex.org/W2593572697","https://openalex.org/W2605982830","https://openalex.org/W2613718673","https://openalex.org/W2750938222","https://openalex.org/W2752225195","https://openalex.org/W2752782242","https://openalex.org/W2768926640","https://openalex.org/W2777652944","https://openalex.org/W2786345741","https://openalex.org/W2795619303","https://openalex.org/W2809273748","https://openalex.org/W2810028092","https://openalex.org/W2875814315","https://openalex.org/W2896457183","https://openalex.org/W2904601236","https://openalex.org/W2950136945","https://openalex.org/W2962835968","https://openalex.org/W2962986948","https://openalex.org/W2963327605","https://openalex.org/W2963341956","https://openalex.org/W2963420686","https://openalex.org/W2963647456","https://openalex.org/W2963687456","https://openalex.org/W2964018263","https://openalex.org/W2964296749","https://openalex.org/W2964346820","https://openalex.org/W3003336019","https://openalex.org/W3003552100","https://openalex.org/W3004408914","https://openalex.org/W3004846386","https://openalex.org/W3104637907","https://openalex.org/W3106228955","https://openalex.org/W3106271744","https://openalex.org/W6620707391","https://openalex.org/W6629590909","https://openalex.org/W6637373629","https://openalex.org/W6649973027","https://openalex.org/W6650820226","https://openalex.org/W6681153094","https://openalex.org/W6726857151","https://openalex.org/W6734085379","https://openalex.org/W6744310024","https://openalex.org/W6752143097","https://openalex.org/W6752534923","https://openalex.org/W6755207826","https://openalex.org/W6760811728","https://openalex.org/W6765837245","https://openalex.org/W6771062460"],"related_works":["https://openalex.org/W2025763381","https://openalex.org/W3162540683","https://openalex.org/W1971234693","https://openalex.org/W1487724385","https://openalex.org/W2132006538","https://openalex.org/W1534205747","https://openalex.org/W2037668370","https://openalex.org/W149061968","https://openalex.org/W2921578339","https://openalex.org/W840083456"],"abstract_inverted_index":{"Document":[0],"images":[1,74],"are":[2],"ubiquitous,":[3],"but":[4,13],"existing":[5],"methods":[6],"mainly":[7],"focus":[8],"on":[9],"the":[10,41,45,99],"text":[11,60],"reading":[12],"not":[14],"information":[15,26,78],"understanding.":[16],"In":[17],"this":[18],"paper,":[19],"we":[20,56],"propose":[21],"a":[22,65,76,95],"novel":[23,77],"document":[24,46,52,73,106],"image":[25],"extraction":[27,79],"framework":[28,88],"with":[29,81],"application":[30],"to":[31],"domain-specific":[32,105],"analysis.":[33,107],"Key":[34],"gains":[35],"of":[36,44,98],"our":[37],"system":[38],"result":[39],"from":[40],"modularized":[42],"implementation":[43],"analysis":[47,53],"modules":[48],"needed":[49,103],"for":[50,72],"different":[51],"problems.":[54],"Further,":[55],"provide":[57],"an":[58],"efficient":[59],"recognition":[61],"approach":[62],"that":[63],"makes":[64],"trade-off":[66],"between":[67],"performance":[68],"and":[69,75,84,91,93],"running":[70],"speed":[71],"method":[80],"both":[82],"visual":[83],"semantic":[85],"information.":[86],"Our":[87],"is":[89,102],"scalable":[90],"customizable,":[92],"only":[94],"few":[96],"annotations":[97],"keyword-content":[100],"mapping":[101],"towards":[104]},"counts_by_year":[{"year":2021,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
