{"id":"https://openalex.org/W3010230221","doi":"https://doi.org/10.1109/wacv45572.2020.9093376","title":"Multi-Modal Association based Grouping for Form Structure Extraction","display_name":"Multi-Modal Association based Grouping for Form Structure Extraction","publication_year":2020,"publication_date":"2020-03-01","ids":{"openalex":"https://openalex.org/W3010230221","doi":"https://doi.org/10.1109/wacv45572.2020.9093376","mag":"3010230221"},"language":"en","primary_location":{"id":"doi:10.1109/wacv45572.2020.9093376","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv45572.2020.9093376","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087749825","display_name":"Milan Aggarwal","orcid":"https://orcid.org/0000-0001-6246-9750"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Milan Aggarwal","raw_affiliation_strings":["Media and Data Science Research Lab, Adobe"],"affiliations":[{"raw_affiliation_string":"Media and Data Science Research Lab, Adobe","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112660758","display_name":"Mausoom Sarkar","orcid":null},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mausoom Sarkar","raw_affiliation_strings":["Media and Data Science Research Lab, Adobe"],"affiliations":[{"raw_affiliation_string":"Media and Data Science Research Lab, Adobe","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026139045","display_name":"Hiresh Gupta","orcid":null},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hiresh Gupta","raw_affiliation_strings":["Adobe Experience Cloud"],"affiliations":[{"raw_affiliation_string":"Adobe Experience Cloud","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081562101","display_name":"Balaji Krishnamurthy","orcid":"https://orcid.org/0000-0003-0464-536X"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Balaji Krishnamurthy","raw_affiliation_strings":["Media and Data Science Research Lab, Adobe"],"affiliations":[{"raw_affiliation_string":"Media and Data Science Research Lab, Adobe","institution_ids":["https://openalex.org/I1306409833"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5087749825"],"corresponding_institution_ids":["https://openalex.org/I1306409833"],"apc_list":null,"apc_paid":null,"fwci":0.7816,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.73349802,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2064","last_page":"2073"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7753216624259949},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.652406632900238},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6313130855560303},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5917506217956543},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5669684410095215},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5599998235702515},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5361412167549133},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.5226335525512695},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5024726390838623},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4716476798057556},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4311484098434448},{"id":"https://openalex.org/keywords/association","display_name":"Association (psychology)","score":0.4185613691806793},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4135192632675171},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.14843684434890747}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7753216624259949},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.652406632900238},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6313130855560303},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5917506217956543},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5669684410095215},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5599998235702515},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5361412167549133},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.5226335525512695},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5024726390838623},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4716476798057556},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4311484098434448},{"id":"https://openalex.org/C142853389","wikidata":"https://www.wikidata.org/wiki/Q744778","display_name":"Association (psychology)","level":2,"score":0.4185613691806793},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4135192632675171},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.14843684434890747},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wacv45572.2020.9093376","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv45572.2020.9093376","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6499999761581421,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1545568302","https://openalex.org/W1571689097","https://openalex.org/W1903029394","https://openalex.org/W1940872118","https://openalex.org/W2016589492","https://openalex.org/W2064675550","https://openalex.org/W2077701230","https://openalex.org/W2099926976","https://openalex.org/W2112692009","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2153788094","https://openalex.org/W2163534337","https://openalex.org/W2444353601","https://openalex.org/W2470673105","https://openalex.org/W2493916176","https://openalex.org/W2606613765","https://openalex.org/W2612445135","https://openalex.org/W2768926640","https://openalex.org/W2786480153","https://openalex.org/W2787202124","https://openalex.org/W2795424778","https://openalex.org/W2891117443","https://openalex.org/W2895359355","https://openalex.org/W2922714365","https://openalex.org/W2947372801","https://openalex.org/W2963136605","https://openalex.org/W2963542991","https://openalex.org/W2963887528","https://openalex.org/W2964121744","https://openalex.org/W2964308564","https://openalex.org/W2964309882","https://openalex.org/W2964346820","https://openalex.org/W2998913931","https://openalex.org/W3013022628","https://openalex.org/W3104049173","https://openalex.org/W3104637907","https://openalex.org/W4297775537","https://openalex.org/W6634379806","https://openalex.org/W6679436768","https://openalex.org/W6683992317","https://openalex.org/W6748481559"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2130974462","https://openalex.org/W972276598","https://openalex.org/W4246352526","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W2087343574","https://openalex.org/W2890372105"],"abstract_inverted_index":{"Document":[0],"structure":[1,24,43],"extraction":[2],"has":[3,16],"been":[4,17],"a":[5,37,80,106,124,166],"widely":[6],"researched":[7],"area":[8],"for":[9,41,70,174],"decades.":[10],"Recent":[11],"work":[12],"in":[13,73],"this":[14,33,131],"direction":[15],"deep":[18],"learning-based,":[19],"mostly":[20],"focusing":[21],"on":[22],"extracting":[23],"using":[25,196],"fully":[26],"convolution":[27],"NN":[28],"through":[29,105,123,153,191],"semantic":[30,180],"segmentation.":[31],"In":[32],"work,":[34],"we":[35,53,78],"present":[36],"novel":[38],"multi-modal":[39],"approach":[40,164],"form":[42],"extraction.":[44],"Given":[45],"simple":[46],"elements":[47,92],"such":[48,57],"as":[49,58],"textruns":[50],"and":[51,64,99,112,142,172],"widgets,":[52],"extract":[54],"higher-order":[55],"structures":[56,152],"TextBlocks,":[59],"Text":[60],"Fields,":[61,63],"Choice":[62,65],"Groups,":[66],"which":[67],"are":[68,147],"essential":[69],"information":[71],"collection":[72],"forms.":[74],"To":[75],"achieve":[76],"this,":[77],"obtain":[79,109],"local":[81],"image":[82,116],"patch":[83,117],"around":[84],"each":[85],"low-level":[86],"element":[87],"(reference)":[88],"by":[89,120],"identifying":[90],"candidate":[91],"closest":[93],"to":[94,108,135,149],"it.":[95],"We":[96,184,199],"process":[97],"textual":[98],"spatial":[100],"representation":[101],"of":[102,162,168,188],"candidates":[103],"sequentially":[104],"BiLSTM":[107],"context-aware":[110],"representations":[111],"fuse":[113],"them":[114],"with":[115],"features":[118],"obtained":[119],"processing":[121],"it":[122,194],"CNN.":[125],"Subsequently,":[126],"the":[127,137,160,175,186],"sequential":[128],"decoder":[129],"takes":[130],"fused":[132],"feature":[133],"vector":[134],"predict":[136],"association":[138],"type":[139],"between":[140],"reference":[141],"candidates.":[143],"These":[144],"predicted":[145],"associations":[146],"utilized":[148],"determine":[150],"larger":[151],"connected":[154],"components":[155],"analysis.":[156],"Experimental":[157],"results":[158],"show":[159,185],"effectiveness":[161],"our":[163,189,202],"achieving":[165],"recall":[167],"90.29%,":[169],"73.80%,":[170],"83.12%,":[171],"52.72%":[173],"above":[176],"structures,":[177],"respectively,":[178],"outperforming":[179],"segmentation":[181],"baselines":[182],"significantly.":[183],"efficacy":[187],"method":[190],"ablations,":[192],"comparing":[193],"against":[195],"individual":[197],"modalities.":[198],"also":[200],"introduce":[201],"new":[203],"rich":[204],"human-annotated":[205],"Forms":[206],"Dataset.":[207]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
