{"id":"https://openalex.org/W2061431794","doi":"https://doi.org/10.3390/a5040490","title":"The Effects of Tabular-Based Content Extraction on Patent Document Clustering","display_name":"The Effects of Tabular-Based Content Extraction on Patent Document Clustering","publication_year":2012,"publication_date":"2012-10-22","ids":{"openalex":"https://openalex.org/W2061431794","doi":"https://doi.org/10.3390/a5040490","mag":"2061431794"},"language":"en","primary_location":{"id":"doi:10.3390/a5040490","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a5040490","pdf_url":"https://www.mdpi.com/1999-4893/5/4/490/pdf?version=1350888938","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/5/4/490/pdf?version=1350888938","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055612337","display_name":"Denise R. Koessler","orcid":null},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Denise R. Koessler","raw_affiliation_strings":["EECS Department, Min H. Kao Building Suite 401, University of Tennessee, 1520 Middle Drive, Knoxville, TN 37996, USA"],"affiliations":[{"raw_affiliation_string":"EECS Department, Min H. Kao Building Suite 401, University of Tennessee, 1520 Middle Drive, Knoxville, TN 37996, USA","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031262684","display_name":"B. W. Martin","orcid":"https://orcid.org/0000-0003-3098-6053"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Benjamin W. Martin","raw_affiliation_strings":["EECS Department, Min H. Kao Building Suite 401, University of Tennessee, 1520 Middle Drive, Knoxville, TN 37996, USA"],"affiliations":[{"raw_affiliation_string":"EECS Department, Min H. Kao Building Suite 401, University of Tennessee, 1520 Middle Drive, Knoxville, TN 37996, USA","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056465968","display_name":"Bruce E. Kiefer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bruce E. Kiefer","raw_affiliation_strings":["Catalyst Repository Systems, 1860 Blake Street, 7th Floor, Denver, CO 80202, USA"],"affiliations":[{"raw_affiliation_string":"Catalyst Repository Systems, 1860 Blake Street, 7th Floor, Denver, CO 80202, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075015473","display_name":"Michael W. Berry","orcid":"https://orcid.org/0000-0002-9191-9148"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Michael W. Berry","raw_affiliation_strings":["EECS Department, Min H. Kao Building Suite 401, University of Tennessee, 1520 Middle Drive, Knoxville, TN 37996, USA"],"affiliations":[{"raw_affiliation_string":"EECS Department, Min H. Kao Building Suite 401, University of Tennessee, 1520 Middle Drive, Knoxville, TN 37996, USA","institution_ids":["https://openalex.org/I75027704"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5075015473"],"corresponding_institution_ids":["https://openalex.org/I75027704"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19169745,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"5","issue":"4","first_page":"490","last_page":"505"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10856","display_name":"Intellectual Property and Patents","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1405","display_name":"Management of Technology and Innovation"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10856","display_name":"Intellectual Property and Patents","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1405","display_name":"Management of Technology and Innovation"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8275160789489746},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.7969844937324524},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7566903829574585},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.7159630060195923},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.6283588409423828},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6050112247467041},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5747448801994324},{"id":"https://openalex.org/keywords/trademark","display_name":"Trademark","score":0.549464762210846},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5493471622467041},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.508868932723999},{"id":"https://openalex.org/keywords/metadata-repository","display_name":"Metadata repository","score":0.47812214493751526},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.47521668672561646},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4672866463661194},{"id":"https://openalex.org/keywords/document-retrieval","display_name":"Document retrieval","score":0.4429875612258911},{"id":"https://openalex.org/keywords/document-structure-description","display_name":"Document Structure Description","score":0.42404043674468994},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4044559895992279},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2962037920951843},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.1886962652206421},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.1484537124633789},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.12462562322616577}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8275160789489746},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.7969844937324524},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7566903829574585},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.7159630060195923},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.6283588409423828},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6050112247467041},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5747448801994324},{"id":"https://openalex.org/C2779027411","wikidata":"https://www.wikidata.org/wiki/Q167270","display_name":"Trademark","level":2,"score":0.549464762210846},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5493471622467041},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.508868932723999},{"id":"https://openalex.org/C153048206","wikidata":"https://www.wikidata.org/wiki/Q3454922","display_name":"Metadata repository","level":3,"score":0.47812214493751526},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.47521668672561646},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4672866463661194},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.4429875612258911},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.42404043674468994},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4044559895992279},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2962037920951843},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.1886962652206421},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.1484537124633789},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.12462562322616577},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/a5040490","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a5040490","pdf_url":"https://www.mdpi.com/1999-4893/5/4/490/pdf?version=1350888938","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:10f438ba56c3429c8d548b514e025bba","is_oa":true,"landing_page_url":"https://doaj.org/article/10f438ba56c3429c8d548b514e025bba","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 5, Iss 4, Pp 490-505 (2012)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/a5040490","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a5040490","pdf_url":"https://www.mdpi.com/1999-4893/5/4/490/pdf?version=1350888938","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2061431794.pdf","grobid_xml":"https://content.openalex.org/works/W2061431794.grobid-xml"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W133276960","https://openalex.org/W143345331","https://openalex.org/W1498393282","https://openalex.org/W1868866275","https://openalex.org/W1982114217","https://openalex.org/W2008735451","https://openalex.org/W2016607116","https://openalex.org/W2021795960","https://openalex.org/W2024932032","https://openalex.org/W2030089308","https://openalex.org/W2062418945","https://openalex.org/W2105504191","https://openalex.org/W2132313828","https://openalex.org/W2158903965","https://openalex.org/W2479578310","https://openalex.org/W4213417150","https://openalex.org/W6605419406","https://openalex.org/W6654320364","https://openalex.org/W6655863335"],"related_works":["https://openalex.org/W2145036943","https://openalex.org/W2000031603","https://openalex.org/W2027019938","https://openalex.org/W2079058854","https://openalex.org/W2097605975","https://openalex.org/W181681892","https://openalex.org/W309385283","https://openalex.org/W1518053583","https://openalex.org/W2141958537","https://openalex.org/W2102270039"],"abstract_inverted_index":{"Data":[0],"can":[1,33,83],"be":[2,34,84],"represented":[3],"in":[4,48,134],"many":[5],"different":[6],"ways":[7],"within":[8,141],"a":[9,74,103,126,142],"particular":[10],"document":[11,31,79,95,108,139],"or":[12,25],"set":[13],"of":[14,29,105,138],"documents.":[15],"Hence,":[16],"attempts":[17],"to":[18,43,77,102,125],"automatically":[19,44],"process":[20],"the":[21,27,55,68,106,129,136],"relationships":[22],"between":[23],"documents":[24],"determine":[26],"relevance":[28],"certain":[30],"objects":[32,46,65,140],"problematic.":[35],"In":[36],"this":[37],"study,":[38],"we":[39],"have":[40],"developed":[41],"software":[42,69],"catalog":[45],"contained":[47],"HTML":[49],"files":[50],"for":[51,89,131],"patents":[52],"granted":[53],"by":[54],"United":[56],"States":[57],"Patent":[58],"and":[59,87,97,117],"Trademark":[60],"Office":[61],"(USPTO).":[62],"Once":[63],"these":[64],"are":[66],"recognized,":[67],"creates":[70],"metadata":[71,82],"that":[72,115],"assigns":[73],"data":[75,119],"type":[76],"each":[78],"object.":[80],"Such":[81],"easily":[85],"processed":[86],"analyzed":[88],"subsequent":[90],"text":[91],"mining":[92],"tasks.":[93],"Specifically,":[94],"similarity":[96],"clustering":[98],"techniques":[99],"were":[100],"applied":[101],"subset":[104],"USPTO":[107],"collection.":[109],"Although":[110],"our":[111],"preliminary":[112],"results":[113],"demonstrate":[114],"tables":[116],"numerical":[118],"do":[120],"not":[121],"provide":[122],"quantifiable":[123],"value":[124],"document\u2019s":[127],"content,":[128],"stage":[130],"future":[132],"work":[133],"measuring":[135],"importance":[137],"large":[143],"corpus":[144],"has":[145],"been":[146],"set.":[147]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
