{"id":"https://openalex.org/W3209734220","doi":"https://doi.org/10.1145/3459637.3482484","title":"Tabular Functional Block Detection with Embedding-based Agglomerative Cell Clustering","display_name":"Tabular Functional Block Detection with Embedding-based Agglomerative Cell Clustering","publication_year":2021,"publication_date":"2021-10-26","ids":{"openalex":"https://openalex.org/W3209734220","doi":"https://doi.org/10.1145/3459637.3482484","mag":"3209734220"},"language":"en","primary_location":{"id":"doi:10.1145/3459637.3482484","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3459637.3482484","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3459637.3482484","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Information &amp; Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3459637.3482484","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036104236","display_name":"Kexuan Sun","orcid":"https://orcid.org/0000-0003-3261-7358"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kexuan Sun","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100455958","display_name":"Fei Wang","orcid":"https://orcid.org/0000-0003-3462-8472"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fei Wang","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102861481","display_name":"Muhao Chen","orcid":"https://orcid.org/0000-0003-0118-3147"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Muhao Chen","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073443117","display_name":"Jay Pujara","orcid":"https://orcid.org/0000-0001-6921-1744"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jay Pujara","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5036104236"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":0.0961,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.40972222,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1744","last_page":"1753"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9858999848365784,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.786461591720581},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6731979846954346},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.6530968546867371},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.6252602338790894},{"id":"https://openalex.org/keywords/merge","display_name":"Merge (version control)","score":0.6194254159927368},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5617243647575378},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5542831420898438},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.5456160306930542},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.492047518491745},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4151936173439026},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41251975297927856},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.24122273921966553},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1142238974571228}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.786461591720581},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6731979846954346},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.6530968546867371},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.6252602338790894},{"id":"https://openalex.org/C197129107","wikidata":"https://www.wikidata.org/wiki/Q1921621","display_name":"Merge (version control)","level":2,"score":0.6194254159927368},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5617243647575378},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5542831420898438},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.5456160306930542},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.492047518491745},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4151936173439026},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41251975297927856},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.24122273921966553},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1142238974571228},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3459637.3482484","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3459637.3482484","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3459637.3482484","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Information &amp; Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3459637.3482484","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3459637.3482484","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3459637.3482484","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Information &amp; Knowledge Management","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5296461164","display_name":null,"funder_award_id":"FA8750-20-2-10002,FA8650-17-C-7715","funder_id":"https://openalex.org/F4320338294","funder_display_name":"Air Force Research Laboratory"},{"id":"https://openalex.org/G546011143","display_name":null,"funder_award_id":"W911NF-19-20271","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320338294","display_name":"Air Force Research Laboratory","ror":"https://ror.org/02e2egq70"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3209734220.pdf","grobid_xml":"https://content.openalex.org/works/W3209734220.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W1495062271","https://openalex.org/W1599454686","https://openalex.org/W1903029394","https://openalex.org/W1911592683","https://openalex.org/W1974329121","https://openalex.org/W1988217119","https://openalex.org/W1990899722","https://openalex.org/W2031489346","https://openalex.org/W2032655922","https://openalex.org/W2048282669","https://openalex.org/W2104875837","https://openalex.org/W2106053110","https://openalex.org/W2117462434","https://openalex.org/W2143309843","https://openalex.org/W2153233077","https://openalex.org/W2444353601","https://openalex.org/W2550378979","https://openalex.org/W2892181857","https://openalex.org/W2901585524","https://openalex.org/W2963091658","https://openalex.org/W2963572185","https://openalex.org/W2963899988","https://openalex.org/W2968396469","https://openalex.org/W3014771378","https://openalex.org/W3015458397","https://openalex.org/W3035140194","https://openalex.org/W3035231859","https://openalex.org/W3047712904","https://openalex.org/W3153032435","https://openalex.org/W3157891451","https://openalex.org/W3202214569"],"related_works":["https://openalex.org/W2081900870","https://openalex.org/W4234886518","https://openalex.org/W2389591058","https://openalex.org/W2382112581","https://openalex.org/W3124036233","https://openalex.org/W4229787472","https://openalex.org/W2486541857","https://openalex.org/W2108840191","https://openalex.org/W2759366996","https://openalex.org/W2110679372"],"abstract_inverted_index":{"Tables":[0],"are":[1,38],"a":[2,59,99,128,163],"widely-used":[3],"format":[4],"for":[5,28,91,166,171],"data":[6,87],"curation.":[7],"The":[8],"diversity":[9,154],"of":[10,15,62,155,188],"domains,":[11],"layouts,":[12],"and":[13,47,76,135,148],"content":[14],"tables":[16,156],"makes":[17],"knowledge":[18,31],"extraction":[19],"challenging.":[20],"Understanding":[21],"table":[22,36,64,149],"layouts":[23],"is":[24,58],"an":[25],"important":[26],"step":[27],"automatically":[29,167],"harvesting":[30],"from":[32],"tabular":[33],"data.":[34],"Since":[35],"cells":[37],"spatially":[39],"organized":[40],"into":[41,114],"regions,":[42],"correctly":[43],"identifying":[44,92],"such":[45],"regions":[46],"inferring":[48],"their":[49],"functional":[50,55,67,101],"roles,":[51],"referred":[52],"to":[53,72,144],"as":[54,89,138,140],"block":[56,68,102,141],"detection,":[57],"critical":[60],"part":[61],"understanding":[63],"layouts.":[65],"Earlier":[66],"detection":[69,103],"approaches":[70],"fail":[71],"leverage":[73],"spatial":[74],"relationships":[75],"higher-level":[77],"structure,":[78],"either":[79],"depending":[80],"on":[81,86],"cell-level":[82],"predictions":[83],"or":[84],"relying":[85],"types":[88],"signals":[90],"blocks.":[93],"In":[94],"this":[95],"paper,":[96],"we":[97,160],"introduce":[98,162],"flexible":[100],"method":[104,123,179,185],"by":[105],"applying":[106],"agglomerative":[107],"clustering":[108],"techniques":[109],"which":[110,132],"merge":[111],"smaller":[112],"blocks":[113,116],"larger":[115],"using":[117],"two":[118],"merging":[119],"strategies.":[120],"Our":[121],"proposed":[122],"uses":[124],"cell":[125],"embeddings":[126],"with":[127],"customized":[129],"dissimilarity":[130],"function":[131],"utilizes":[133],"local":[134],"margin":[136],"distances,":[137],"well":[139],"coherence":[142],"metrics":[143],"capture":[145],"cell,":[146],"block,":[147],"scoped":[150],"features.":[151],"Given":[152],"the":[153,182],"in":[157,186],"real-world":[158],"corpora,":[159],"also":[161],"sampling-based":[164],"approach":[165],"tuning":[168],"distance":[169],"thresholds":[170],"each":[172],"table.":[173],"Experimental":[174],"results":[175],"show":[176],"that":[177],"our":[178],"improves":[180],"over":[181],"earlier":[183],"state-of-the-art":[184],"terms":[187],"several":[189],"evaluation":[190],"metrics.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
