{"id":"https://openalex.org/W1966382373","doi":"https://doi.org/10.1145/1148170.1148307","title":"Building a test collection for complex document information processing","display_name":"Building a test collection for complex document information processing","publication_year":2006,"publication_date":"2006-08-06","ids":{"openalex":"https://openalex.org/W1966382373","doi":"https://doi.org/10.1145/1148170.1148307","mag":"1966382373"},"language":"en","primary_location":{"id":"doi:10.1145/1148170.1148307","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1148170.1148307","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th annual international ACM SIGIR conference on Research and development in information retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020081385","display_name":"David Lewis","orcid":"https://orcid.org/0000-0002-3503-4644"},"institutions":[{"id":"https://openalex.org/I4210148173","display_name":"Morgan, Lewis & Bockius (United States)","ror":"https://ror.org/04c0z3e83","country_code":"US","type":"company","lineage":["https://openalex.org/I4210148173"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"D. Lewis","raw_affiliation_strings":["David D. Lewis Consulting Chicago, IL"],"affiliations":[{"raw_affiliation_string":"David D. Lewis Consulting Chicago, IL","institution_ids":["https://openalex.org/I4210148173"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023698748","display_name":"Gady Agam","orcid":null},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"G. Agam","raw_affiliation_strings":["Illinois Institute of Technology Chicago, IL",", Illinois Institute of Technology, Chicago, IL"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology Chicago, IL","institution_ids":["https://openalex.org/I180949307"]},{"raw_affiliation_string":", Illinois Institute of Technology, Chicago, IL","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013934685","display_name":"Shlomo Argamon","orcid":"https://orcid.org/0000-0001-8699-324X"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"S. Argamon","raw_affiliation_strings":["Illinois Institute of Technology Chicago, IL",", Illinois Institute of Technology, Chicago, IL"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology Chicago, IL","institution_ids":["https://openalex.org/I180949307"]},{"raw_affiliation_string":", Illinois Institute of Technology, Chicago, IL","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062591304","display_name":"Ophir Frieder","orcid":"https://orcid.org/0000-0001-5076-8171"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"O. Frieder","raw_affiliation_strings":["Illinois Institute of Technology Chicago, IL",", Illinois Institute of Technology, Chicago, IL"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology Chicago, IL","institution_ids":["https://openalex.org/I180949307"]},{"raw_affiliation_string":", Illinois Institute of Technology, Chicago, IL","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109992497","display_name":"D. Grossman","orcid":null},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"D. Grossman","raw_affiliation_strings":["Illinois Institute of Technology Chicago, IL",", Illinois Institute of Technology, Chicago, IL"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology Chicago, IL","institution_ids":["https://openalex.org/I180949307"]},{"raw_affiliation_string":", Illinois Institute of Technology, Chicago, IL","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112626360","display_name":"Jefferson Heard","orcid":null},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"J. Heard","raw_affiliation_strings":["Illinois Institute of Technology Chicago, IL",", Illinois Institute of Technology, Chicago, IL"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology Chicago, IL","institution_ids":["https://openalex.org/I180949307"]},{"raw_affiliation_string":", Illinois Institute of Technology, Chicago, IL","institution_ids":["https://openalex.org/I180949307"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5020081385"],"corresponding_institution_ids":["https://openalex.org/I4210148173"],"apc_list":null,"apc_paid":null,"fwci":5.0588,"has_fulltext":false,"cited_by_count":281,"citation_normalized_percentile":{"value":0.95710198,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"665","last_page":"666"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9721999764442444,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.941100001335144,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8090343475341797},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6637719869613647},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.6557822227478027},{"id":"https://openalex.org/keywords/terabyte","display_name":"Terabyte","score":0.6169714331626892},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.5545746088027954},{"id":"https://openalex.org/keywords/document-retrieval","display_name":"Document retrieval","score":0.5313314199447632},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.5223756432533264},{"id":"https://openalex.org/keywords/document-processing","display_name":"Document processing","score":0.492749959230423},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.46328204870224},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4167071580886841},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.32580268383026123},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.26858043670654297}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8090343475341797},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6637719869613647},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.6557822227478027},{"id":"https://openalex.org/C199683683","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Terabyte","level":2,"score":0.6169714331626892},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.5545746088027954},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.5313314199447632},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.5223756432533264},{"id":"https://openalex.org/C67905146","wikidata":"https://www.wikidata.org/wiki/Q5287646","display_name":"Document processing","level":2,"score":0.492749959230423},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.46328204870224},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4167071580886841},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.32580268383026123},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26858043670654297},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1148170.1148307","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1148170.1148307","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th annual international ACM SIGIR conference on Research and development in information retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:touroscholar.touro.edu:president_pubs-1244","is_oa":false,"landing_page_url":"https://touroscholar.touro.edu/president_pubs/244","pdf_url":null,"source":{"id":"https://openalex.org/S4377196437","display_name":"Touro Scholar (Touro College)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200437953","host_organization_name":"Touro College","host_organization_lineage":["https://openalex.org/I200437953"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Office of the President Publications and Research","raw_type":"conference"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.145.770","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.145.770","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://ir.iit.edu/publications/downloads/sigir06cdipcoll_v05-with-authors.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W2044830896","https://openalex.org/W2054403892","https://openalex.org/W2070836593"],"related_works":["https://openalex.org/W2218402054","https://openalex.org/W2096747878","https://openalex.org/W4383988422","https://openalex.org/W2168844671","https://openalex.org/W4306157992","https://openalex.org/W4385079034","https://openalex.org/W4320027814","https://openalex.org/W4319431489","https://openalex.org/W2000372441","https://openalex.org/W2904601236"],"abstract_inverted_index":{"Research":[0],"and":[1,24,38,67,86],"development":[2],"of":[3,17,21,28,40,42,55],"information":[4,60],"access":[5],"technology":[6],"for":[7,36],"scanned":[8],"paper":[9],"documents":[10],"has":[11],"been":[12],"hampered":[13],"by":[14],"the":[15],"lack":[16],"public":[18],"test":[19],"collections":[20],"realistic":[22],"scope":[23],"complexity.":[25],"As":[26],"part":[27],"a":[29,33,48],"project":[30],"to":[31,52],"create":[32],"prototype":[34],"system":[35],"search":[37],"mining":[39],"masses":[41],"document":[43,59,81],"images,":[44],"we":[45],"are":[46],"assembling":[47],"1.5":[49],"terabyte":[50],"dataset":[51],"support":[53],"evaluation":[54],"both":[56],"end-to-end":[57],"complex":[58],"processing":[61],"(CDIP)":[62],"tasks":[63],"(e.g.,":[64],"text":[65],"retrieval":[66],"data":[68],"mining)":[69],"as":[70,72,76],"well":[71],"component":[73],"technologies":[74],"such":[75],"optical":[77],"character":[78],"recognition":[79],"(OCR),":[80],"structure":[82],"analysis,":[83],"signature":[84],"matching,":[85],"authorship":[87],"attribution.":[88]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":22},{"year":2024,"cited_by_count":29},{"year":2023,"cited_by_count":46},{"year":2022,"cited_by_count":35},{"year":2021,"cited_by_count":28},{"year":2020,"cited_by_count":14},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":11},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":15},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":9},{"year":2012,"cited_by_count":3}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
