{"id":"https://openalex.org/W2110412794","doi":"https://doi.org/10.1145/1815330.1815353","title":"Information extraction by finding repeated structure","display_name":"Information extraction by finding repeated structure","publication_year":2010,"publication_date":"2010-06-09","ids":{"openalex":"https://openalex.org/W2110412794","doi":"https://doi.org/10.1145/1815330.1815353","mag":"2110412794"},"language":"en","primary_location":{"id":"doi:10.1145/1815330.1815353","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1815330.1815353","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th IAPR International Workshop on Document Analysis Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111847732","display_name":"Evgeniy Bart","orcid":null},"institutions":[{"id":"https://openalex.org/I173498003","display_name":"Palo Alto Research Center","ror":"https://ror.org/0529fxt39","country_code":"US","type":"facility","lineage":["https://openalex.org/I173498003","https://openalex.org/I4210132870"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Evgeniy Bart","raw_affiliation_strings":["Palo Alto Research Center, Palo Alto, CA","Palo Alto Research Center, Palo Alto, CA#TAB#"],"affiliations":[{"raw_affiliation_string":"Palo Alto Research Center, Palo Alto, CA","institution_ids":["https://openalex.org/I173498003"]},{"raw_affiliation_string":"Palo Alto Research Center, Palo Alto, CA#TAB#","institution_ids":["https://openalex.org/I173498003"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109230368","display_name":"Prateek Sarkar","orcid":null},"institutions":[{"id":"https://openalex.org/I173498003","display_name":"Palo Alto Research Center","ror":"https://ror.org/0529fxt39","country_code":"US","type":"facility","lineage":["https://openalex.org/I173498003","https://openalex.org/I4210132870"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Prateek Sarkar","raw_affiliation_strings":["Palo Alto Research Center, Palo Alto, CA","Palo Alto Research Center, Palo Alto, CA#TAB#"],"affiliations":[{"raw_affiliation_string":"Palo Alto Research Center, Palo Alto, CA","institution_ids":["https://openalex.org/I173498003"]},{"raw_affiliation_string":"Palo Alto Research Center, Palo Alto, CA#TAB#","institution_ids":["https://openalex.org/I173498003"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5111847732"],"corresponding_institution_ids":["https://openalex.org/I173498003"],"apc_list":null,"apc_paid":null,"fwci":1.6162,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.85384663,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.758028507232666},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6673609018325806},{"id":"https://openalex.org/keywords/repetition","display_name":"Repetition (rhetorical device)","score":0.6634817719459534},{"id":"https://openalex.org/keywords/information-structure","display_name":"Information structure","score":0.6497830748558044},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5587888360023499},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.514183759689331},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.481525719165802},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.47486162185668945},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.4355233311653137},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40599945187568665},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.34823375940322876},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.33464741706848145},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.32687926292419434}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.758028507232666},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6673609018325806},{"id":"https://openalex.org/C2776141515","wikidata":"https://www.wikidata.org/wiki/Q1274479","display_name":"Repetition (rhetorical device)","level":2,"score":0.6634817719459534},{"id":"https://openalex.org/C2779954242","wikidata":"https://www.wikidata.org/wiki/Q6031227","display_name":"Information structure","level":2,"score":0.6497830748558044},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5587888360023499},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.514183759689331},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.481525719165802},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.47486162185668945},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4355233311653137},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40599945187568665},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34823375940322876},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.33464741706848145},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32687926292419434},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1815330.1815353","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1815330.1815353","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th IAPR International Workshop on Document Analysis Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.682.357","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.682.357","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.parc.com/content/attachments/information-extraction-finding.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1506806321","https://openalex.org/W1537287887","https://openalex.org/W1537936980","https://openalex.org/W1663973292","https://openalex.org/W1817561967","https://openalex.org/W1985617834","https://openalex.org/W2121457610","https://openalex.org/W2147800461","https://openalex.org/W2154422044","https://openalex.org/W2163166770","https://openalex.org/W2167828171","https://openalex.org/W2171090776"],"related_works":["https://openalex.org/W2996195527","https://openalex.org/W2978375718","https://openalex.org/W2612358220","https://openalex.org/W2351132524","https://openalex.org/W2916738897","https://openalex.org/W2392934913","https://openalex.org/W2801329620","https://openalex.org/W2003474770","https://openalex.org/W2163814182","https://openalex.org/W4243403709"],"abstract_inverted_index":{"Repetition":[0],"of":[1,21,36,65,77,91,137,147],"layout":[2],"structure":[3,20,57,67,79,139,148],"is":[4,100,123],"prevalent":[5],"in":[6,26,43,80,85,109,120],"document":[7,10,83],"images.":[8],"In":[9],"design,":[11],"such":[12,55],"repetition":[13],"conveys":[14],"the":[15,22,28,66,71,81],"underlying":[16],"logical":[17],"and":[18,33,84,98,113,125,140],"functional":[19],"data.":[23],"For":[24],"example,":[25],"invoices,":[27],"names,":[29],"unit":[30],"prices,":[31],"quantities":[32],"other":[34],"descriptors":[35],"every":[37],"line":[38],"item":[39],"are":[40,107],"laid":[41],"out":[42],"a":[44,50,62,110,114,144],"consistent":[45],"spatial":[46],"structure.":[47],"We":[48,127],"propose":[49],"general":[51],"method":[52,73,131],"for":[53,102,117],"extracting":[54],"repeated":[56,138],"from":[58],"documents.":[59,87],"After":[60],"receiving":[61],"single":[63],"example":[64],"to":[68],"be":[69],"found,":[70],"proposed":[72,124],"localizes":[74],"additional":[75,86],"instances":[76,136],"this":[78,103,121,130],"same":[82],"A":[88],"wide":[89,145],"variety":[90],"perceptually":[92],"motivated":[93],"cues":[94,106],"(such":[95],"as":[96],"alignment":[97],"saliency)":[99],"used":[101],"purpose.":[104],"These":[105],"combined":[108],"probabilistic":[111],"model,":[112],"novel":[115],"algorithm":[116],"exact":[118],"inference":[119],"model":[122],"used.":[126],"demonstrate":[128],"that":[129],"can":[132],"cope":[133],"with":[134],"complex":[135],"generalizes":[141],"successfully":[142],"across":[143],"range":[146],"variations.":[149]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
