{"id":"https://openalex.org/W2099386700","doi":"https://doi.org/10.1109/tai.2003.1250178","title":"A software infrastructure for research in textual data mining","display_name":"A software infrastructure for research in textual data mining","publication_year":2004,"publication_date":"2004-03-02","ids":{"openalex":"https://openalex.org/W2099386700","doi":"https://doi.org/10.1109/tai.2003.1250178","mag":"2099386700"},"language":"en","primary_location":{"id":"doi:10.1109/tai.2003.1250178","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2003.1250178","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings. 15th IEEE International Conference on Tools with Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029770342","display_name":"Lars E. Holzman","orcid":null},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"L.E. Holzman","raw_affiliation_strings":["Textual Data Mining Lab, Department of Computer Science and Engineering, Lehigh University, USA","Department of Computer Science & Engineering, Lehigh University, USA"],"affiliations":[{"raw_affiliation_string":"Textual Data Mining Lab, Department of Computer Science and Engineering, Lehigh University, USA","institution_ids":["https://openalex.org/I186143895"]},{"raw_affiliation_string":"Department of Computer Science & Engineering, Lehigh University, USA","institution_ids":["https://openalex.org/I186143895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089216940","display_name":"Todd A. Fisher","orcid":null},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"T.A. Fisher","raw_affiliation_strings":["Textual Data Mining Lab, Department of Computer Science and Engineering, Lehigh University, USA","Department of Computer Science & Engineering, Lehigh University, USA"],"affiliations":[{"raw_affiliation_string":"Textual Data Mining Lab, Department of Computer Science and Engineering, Lehigh University, USA","institution_ids":["https://openalex.org/I186143895"]},{"raw_affiliation_string":"Department of Computer Science & Engineering, Lehigh University, USA","institution_ids":["https://openalex.org/I186143895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090533858","display_name":"Leon M. Galitsky","orcid":null},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"L.M. Galitsky","raw_affiliation_strings":["Textual Data Mining Lab, Department of Computer Science and Engineering, Lehigh University, USA","Department of Computer Science & Engineering, Lehigh University, USA"],"affiliations":[{"raw_affiliation_string":"Textual Data Mining Lab, Department of Computer Science and Engineering, Lehigh University, USA","institution_ids":["https://openalex.org/I186143895"]},{"raw_affiliation_string":"Department of Computer Science & Engineering, Lehigh University, USA","institution_ids":["https://openalex.org/I186143895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068333496","display_name":"April Kontostathis","orcid":null},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"A. Kontostathis","raw_affiliation_strings":["Textual Data Mining Lab, Department of Computer Science and Engineering, Lehigh University, USA","Department of Computer Science & Engineering, Lehigh University, USA"],"affiliations":[{"raw_affiliation_string":"Textual Data Mining Lab, Department of Computer Science and Engineering, Lehigh University, USA","institution_ids":["https://openalex.org/I186143895"]},{"raw_affiliation_string":"Department of Computer Science & Engineering, Lehigh University, USA","institution_ids":["https://openalex.org/I186143895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011634099","display_name":"William M. Pottenger","orcid":null},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"W.M. Pottenger","raw_affiliation_strings":["Textual Data Mining Lab, Department of Computer Science and Engineering, Lehigh University, USA","Department of Computer Science & Engineering, Lehigh University, USA"],"affiliations":[{"raw_affiliation_string":"Textual Data Mining Lab, Department of Computer Science and Engineering, Lehigh University, USA","institution_ids":["https://openalex.org/I186143895"]},{"raw_affiliation_string":"Department of Computer Science & Engineering, Lehigh University, USA","institution_ids":["https://openalex.org/I186143895"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5029770342"],"corresponding_institution_ids":["https://openalex.org/I186143895"],"apc_list":null,"apc_paid":null,"fwci":0.8075,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.85758086,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"112","last_page":"121"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7947685718536377},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.6890876293182373},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.6348896026611328},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4975791275501251},{"id":"https://openalex.org/keywords/volume","display_name":"Volume (thermodynamics)","score":0.45365703105926514},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4146000146865845},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.36829277873039246},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3309790790081024}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7947685718536377},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6890876293182373},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.6348896026611328},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4975791275501251},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.45365703105926514},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4146000146865845},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.36829277873039246},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3309790790081024},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/tai.2003.1250178","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2003.1250178","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings. 15th IEEE International Conference on Tools with Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.2.7326","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.2.7326","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cse.lehigh.edu/~billp/pubs/ICTAI.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.67.4067","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.67.4067","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://hddi.cse.lehigh.edu/docs/IJAITSubmission.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.68.2878","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.68.2878","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://hddi.cse.lehigh.edu/docs/AITools.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.6399999856948853,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310365","display_name":"Lehigh University","ror":"https://ror.org/012afjb06"},{"id":"https://openalex.org/F4320332169","display_name":"Directorate for Computer and Information Science and Engineering","ror":"https://ror.org/025kzpk63"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W24844424","https://openalex.org/W41690797","https://openalex.org/W121027324","https://openalex.org/W174335598","https://openalex.org/W1502814178","https://openalex.org/W1580206684","https://openalex.org/W2081687495","https://openalex.org/W2117400858","https://openalex.org/W2123504579","https://openalex.org/W2147152072","https://openalex.org/W2157994885","https://openalex.org/W2466512847","https://openalex.org/W6604946001","https://openalex.org/W6630118212"],"related_works":["https://openalex.org/W2798121181","https://openalex.org/W2016805743","https://openalex.org/W4242592912","https://openalex.org/W435830328","https://openalex.org/W2087896742","https://openalex.org/W1989025965","https://openalex.org/W2328676785","https://openalex.org/W2322380964","https://openalex.org/W2587527225","https://openalex.org/W4237443457"],"abstract_inverted_index":{"Few":[0],"tools":[1,33,68],"exist":[2],"that":[3,53,76,139,148,177],"address":[4],"the":[5,10,94,121,141,158,183],"challenges":[6],"facing":[7],"researchers":[8,166],"in":[9,60,81,103,126],"textual":[11,48],"data":[12,49],"mining":[13,50,117,128],"(TDM)":[14],"field.":[15],"Some":[16],"are":[17,24,35],"too":[18],"specific":[19],"to":[20,65,73,79,167],"their":[21,171],"application,":[22],"or":[23],"prototypes":[25],"not":[26,36,150],"suitable":[27],"for":[28,112,123],"general":[29,32],"use.":[30],"More":[31],"often":[34],"capable":[37],"of":[38,42,85,96,100,108,137],"processing":[39,86],"large":[40],"volumes":[41],"data.":[43],"We":[44,143,154],"have":[45,149],"created":[46],"a":[47,61,82,90,135],"infrastructure":[51],"(TMI)":[52],"incorporates":[54],"both":[55],"existing":[56,161],"and":[57,69,98,169],"new":[58,67],"capabilities":[59],"reusable":[62],"framework":[63],"conductive":[64],"developing":[66],"components.":[70],"TMI":[71,109,159,179],"adheres":[72],"strict":[74],"guidelines":[75],"allow":[77],"it":[78,92],"run":[80],"wide":[83],"range":[84],"environments":[87],"-":[88],"as":[89],"result,":[91],"accommodates":[93],"volume":[95],"computing":[97],"diversity":[99],"research":[101,118],"occurring":[102],"TDM.":[104],"A":[105],"unique":[106],"capability":[107],"is":[110,180],"support":[111],"optimization.":[113],"This":[114],"facilitates":[115],"text":[116,127],"by":[119],"automating":[120],"search":[122],"optimal":[124],"parameters":[125],"algorithms.":[129],"In":[130],"this":[131],"article":[132],"we":[133],"describe":[134],"number":[136],"applications":[138],"use":[140],"TMI.":[142],"present":[144],"several":[145],"novel":[146],"results":[147],"been":[151],"published":[152],"elsewhere.":[153],"also":[155],"discuss":[156],"how":[157],"utilizes":[160],"machine-learning":[162],"libraries,":[163],"thereby":[164],"enabling":[165],"continue":[168],"extend":[170],"endeavors":[172],"with":[173],"minimal":[174],"effort.":[175],"Towards":[176],"end,":[178],"available":[181],"on":[182],"web":[184],"at":[185],"hddi.cse.lehigh.edu.":[186]},"counts_by_year":[{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
