{"id":"https://openalex.org/W2121506769","doi":"https://doi.org/10.1145/1284420.1284466","title":"Elimination of junk document surrogate candidates through pattern recognition","display_name":"Elimination of junk document surrogate candidates through pattern recognition","publication_year":2007,"publication_date":"2007-08-28","ids":{"openalex":"https://openalex.org/W2121506769","doi":"https://doi.org/10.1145/1284420.1284466","mag":"2121506769"},"language":"en","primary_location":{"id":"doi:10.1145/1284420.1284466","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1284420.1284466","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2007 ACM symposium on Document engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071007779","display_name":"Eunyee Koh","orcid":"https://orcid.org/0000-0003-2091-5972"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Eunyee Koh","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX","Texas A&M University, College Station. TX#TAB#"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX","institution_ids":["https://openalex.org/I91045830"]},{"raw_affiliation_string":"Texas A&M University, College Station. TX#TAB#","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071773219","display_name":"Daniel Caruso","orcid":null},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Caruso","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX","Texas A&M University, College Station. TX#TAB#"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX","institution_ids":["https://openalex.org/I91045830"]},{"raw_affiliation_string":"Texas A&M University, College Station. TX#TAB#","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078871533","display_name":"Andruid Kerne","orcid":null},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andruid Kerne","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX","Texas A&M University, College Station. TX#TAB#"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX","institution_ids":["https://openalex.org/I91045830"]},{"raw_affiliation_string":"Texas A&M University, College Station. TX#TAB#","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062423099","display_name":"Ricardo Guti\u00e9rrez\u2010Osuna","orcid":"https://orcid.org/0000-0003-2817-2085"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ricardo Gutierrez-Osuna","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX","Texas A&M University, College Station. TX#TAB#"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX","institution_ids":["https://openalex.org/I91045830"]},{"raw_affiliation_string":"Texas A&M University, College Station. TX#TAB#","institution_ids":["https://openalex.org/I91045830"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5071007779"],"corresponding_institution_ids":["https://openalex.org/I91045830"],"apc_list":null,"apc_paid":null,"fwci":0.8283,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.85045853,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"187","last_page":"195"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9768000245094299,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9732999801635742,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7350371479988098},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6542223691940308},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.633838415145874},{"id":"https://openalex.org/keywords/hypermedia","display_name":"Hypermedia","score":0.6321336030960083},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5709285736083984},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.42469972372055054},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4168801009654999},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.26544642448425293},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12861499190330505}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7350371479988098},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6542223691940308},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.633838415145874},{"id":"https://openalex.org/C2780126544","wikidata":"https://www.wikidata.org/wiki/Q837900","display_name":"Hypermedia","level":2,"score":0.6321336030960083},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5709285736083984},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42469972372055054},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4168801009654999},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.26544642448425293},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12861499190330505}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1284420.1284466","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1284420.1284466","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2007 ACM symposium on Document engineering","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.134.5322","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.134.5322","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://research.cs.tamu.edu/prism/publications/doceng07_koh.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.410.4509","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.410.4509","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://ecologylab.net/research/publications/kohKerneDocEng07.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W202150751","https://openalex.org/W1554663460","https://openalex.org/W1802760973","https://openalex.org/W1988935260","https://openalex.org/W2002444211","https://openalex.org/W2007525532","https://openalex.org/W2017553407","https://openalex.org/W2019264297","https://openalex.org/W2029058827","https://openalex.org/W2059216172","https://openalex.org/W2062620222","https://openalex.org/W2074663912","https://openalex.org/W2089477727","https://openalex.org/W2112162759","https://openalex.org/W2120615849","https://openalex.org/W2124776405","https://openalex.org/W2128422191","https://openalex.org/W2139921367","https://openalex.org/W2150721933","https://openalex.org/W2158364734","https://openalex.org/W2160196229","https://openalex.org/W2161387974","https://openalex.org/W2169933757","https://openalex.org/W2285257517","https://openalex.org/W2296695996","https://openalex.org/W2799061466","https://openalex.org/W3146003712","https://openalex.org/W4230663420","https://openalex.org/W4298290070"],"related_works":["https://openalex.org/W1986400483","https://openalex.org/W2755139859","https://openalex.org/W2114841919","https://openalex.org/W2021446321","https://openalex.org/W2118942077","https://openalex.org/W2264492131","https://openalex.org/W2029270702","https://openalex.org/W37150012","https://openalex.org/W1997004243","https://openalex.org/W1557661861"],"abstract_inverted_index":{"A":[0],"surrogate":[1,62,67],"is":[2,17,60],"an":[3,70],"object":[4],"that":[5,14,28],"stands":[6],"for":[7],"a":[8,48,52,61],"document":[9,49],"and":[10,30,39,87],"enables":[11],"navigation":[12,85],"to":[13],"document.":[15],"Hypermedia":[16],"often":[18],"represented":[19],"with":[20,78],"textual":[21],"surrogates,":[22],"even":[23],"though":[24],"studies":[25],"have":[26],"shown":[27],"image":[29],"text":[31],"surrogates":[32],"facilitate":[33],"the":[34],"formation":[35],"of":[36,54,58],"mental":[37],"models":[38],"overall":[40],"understanding.":[41],"Surrogates":[42],"may":[43,75],"be":[44],"formed":[45],"by":[46],"breaking":[47],"down":[50],"into":[51],"set":[53],"smaller":[55],"elements,":[56],"each":[57],"which":[59],"candidate.":[63],"While":[64],"processing":[65],"these":[66],"candidates":[68],"from":[69],"HTML":[71],"document,":[72],"relevant":[73],"information":[74],"appear":[76],"together":[77],"less":[79],"useful":[80],"junk":[81],"material,":[82],"such":[83],"as":[84],"bars":[86],"advertisements.":[88]},"counts_by_year":[{"year":2017,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
