{"id":"https://openalex.org/W2018593249","doi":"https://doi.org/10.1145/1143997.1144308","title":"Characterizing large text corpora using a maximum variation sampling genetic algorithm","display_name":"Characterizing large text corpora using a maximum variation sampling genetic algorithm","publication_year":2006,"publication_date":"2006-07-08","ids":{"openalex":"https://openalex.org/W2018593249","doi":"https://doi.org/10.1145/1143997.1144308","mag":"2018593249"},"language":"en","primary_location":{"id":"doi:10.1145/1143997.1144308","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1143997.1144308","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th annual conference on Genetic and evolutionary computation","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021986280","display_name":"Robert M. Patton","orcid":"https://orcid.org/0000-0002-8101-0571"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Robert M. Patton","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, TN","Oak Ridge National Laboratory, Oak Ridge, TN#TAB#"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, TN","institution_ids":["https://openalex.org/I1289243028"]},{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, TN#TAB#","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000241304","display_name":"Thomas E. Potok","orcid":"https://orcid.org/0000-0001-6687-3435"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thomas E. Potok","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, TN","Oak Ridge National Laboratory, Oak Ridge, TN#TAB#"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, TN","institution_ids":["https://openalex.org/I1289243028"]},{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, TN#TAB#","institution_ids":["https://openalex.org/I1289243028"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5021986280"],"corresponding_institution_ids":["https://openalex.org/I1289243028"],"apc_list":null,"apc_paid":null,"fwci":0.9315,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.80480703,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1877","last_page":"1878"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.961899995803833,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.961899995803833,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9402999877929688,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9332000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7838300466537476},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.6158071756362915},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.5711442232131958},{"id":"https://openalex.org/keywords/cover","display_name":"Cover (algebra)","score":0.5657770037651062},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.5311122536659241},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5212181806564331},{"id":"https://openalex.org/keywords/genetic-algorithm","display_name":"Genetic algorithm","score":0.520384669303894},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5193542242050171},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4759005010128021},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4029754400253296},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3628402352333069},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33583056926727295},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2759159207344055},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.12118831276893616}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7838300466537476},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.6158071756362915},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.5711442232131958},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.5657770037651062},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.5311122536659241},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5212181806564331},{"id":"https://openalex.org/C8880873","wikidata":"https://www.wikidata.org/wiki/Q187787","display_name":"Genetic algorithm","level":2,"score":0.520384669303894},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5193542242050171},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4759005010128021},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4029754400253296},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3628402352333069},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33583056926727295},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2759159207344055},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.12118831276893616},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1143997.1144308","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1143997.1144308","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th annual conference on Genetic and evolutionary computation","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.109.786","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.109.786","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://aser.ornl.gov/publications/GECO06.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.421.1195","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.421.1195","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.bham.ac.uk/~wbl/biblio/gecco2006/docs/p1877.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.6700000166893005,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W1979700756","https://openalex.org/W2021569364","https://openalex.org/W2156698647"],"related_works":["https://openalex.org/W2386430105","https://openalex.org/W2356521405","https://openalex.org/W2038534795","https://openalex.org/W2384358604","https://openalex.org/W1567829292","https://openalex.org/W3001063351","https://openalex.org/W3196905815","https://openalex.org/W2032233321","https://openalex.org/W2351370765","https://openalex.org/W4288033310"],"abstract_inverted_index":{"There":[0],"exists":[1],"an":[2],"enormous":[3],"amount":[4],"of":[5,12,19,27,61,73],"information":[6],"available":[7],"via":[8,85],"the":[9,17,34,53,71],"Internet.":[10],"Much":[11],"this":[13,89,96],"data":[14,101],"is":[15],"in":[16],"form":[18],"text-based":[20],"documents.":[21],"These":[22],"documents":[23,62,84],"cover":[24],"a":[25,43,58,74,79,104],"variety":[26],"topics":[28],"that":[29],"are":[30,42,115],"vitally":[31],"important":[32],"to":[33,55],"scientific,":[35],"business,":[36],"and":[37,48,94,117],"defense/security":[38],"communities.":[39],"Currently,":[40],"there":[41],"many":[44],"techniques":[45],"for":[46,77,82],"processing":[47],"analyzing":[49],"such":[50],"data.":[51],"However,":[52],"ability":[54],"quickly":[56],"characterize":[57],"large":[59],"set":[60],"still":[63],"proves":[64],"challenging.":[65],"Previous":[66],"work":[67],"has":[68],"successfully":[69],"demonstrated":[70],"use":[72],"genetic":[75],"algorithm":[76],"providing":[78],"representative":[80],"subset":[81],"text":[83],"adaptive":[86,110],"sampling.":[87],"In":[88],"work,":[90],"we":[91],"further":[92],"expand":[93],"explore":[95],"approach":[97],"on":[98],"much":[99],"larger":[100],"sets":[102],"using":[103],"parallel":[105],"Genetic":[106],"Algorithm":[107],"(GA)":[108],"with":[109],"parameter":[111],"control.":[112],"Experimental":[113],"results":[114],"presented":[116],"discussed.":[118]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
