{"id":"https://openalex.org/W2015288652","doi":"https://doi.org/10.1145/1138379.1138380","title":"Automatic expansion of domain-specific lexicons by term categorization","display_name":"Automatic expansion of domain-specific lexicons by term categorization","publication_year":2006,"publication_date":"2006-05-01","ids":{"openalex":"https://openalex.org/W2015288652","doi":"https://doi.org/10.1145/1138379.1138380","mag":"2015288652"},"language":"en","primary_location":{"id":"doi:10.1145/1138379.1138380","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1138379.1138380","pdf_url":null,"source":{"id":"https://openalex.org/S200945739","display_name":"ACM Transactions on Speech and Language Processing","issn_l":"1550-4875","issn":["1550-4875","1550-4883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Speech and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066530968","display_name":"Henri Avancini","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107558","display_name":"Consorzio Pisa Ricerche","ror":"https://ror.org/01t0n3b84","country_code":"IT","type":"facility","lineage":["https://openalex.org/I4210107558"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Henri Avancini","raw_affiliation_strings":["Consiglio Nazionale delle Ricerche, Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"Consiglio Nazionale delle Ricerche, Pisa, Italy","institution_ids":["https://openalex.org/I4210107558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073069826","display_name":"Alberto Lavelli","orcid":"https://orcid.org/0000-0002-7175-6804"},"institutions":[{"id":"https://openalex.org/I4210111210","display_name":"Construction Technologies Institute","ror":"https://ror.org/0221agg28","country_code":"IT","type":"facility","lineage":["https://openalex.org/I4210111210","https://openalex.org/I4210155236"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Alberto Lavelli","raw_affiliation_strings":["ITC-irst, Povo (TN), Italy"],"affiliations":[{"raw_affiliation_string":"ITC-irst, Povo (TN), Italy","institution_ids":["https://openalex.org/I4210111210"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063975186","display_name":"Fabrizio Sebastiani","orcid":"https://orcid.org/0000-0003-4221-6427"},"institutions":[{"id":"https://openalex.org/I4210107558","display_name":"Consorzio Pisa Ricerche","ror":"https://ror.org/01t0n3b84","country_code":"IT","type":"facility","lineage":["https://openalex.org/I4210107558"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Fabrizio Sebastiani","raw_affiliation_strings":["Consiglio Nazionale delle Ricerche, Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"Consiglio Nazionale delle Ricerche, Pisa, Italy","institution_ids":["https://openalex.org/I4210107558"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044199881","display_name":"Roberto Zanoli","orcid":"https://orcid.org/0000-0003-0870-0872"},"institutions":[{"id":"https://openalex.org/I4210111210","display_name":"Construction Technologies Institute","ror":"https://ror.org/0221agg28","country_code":"IT","type":"facility","lineage":["https://openalex.org/I4210111210","https://openalex.org/I4210155236"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Roberto Zanoli","raw_affiliation_strings":["ITC-irst, Povo (TN), Italy"],"affiliations":[{"raw_affiliation_string":"ITC-irst, Povo (TN), Italy","institution_ids":["https://openalex.org/I4210111210"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5066530968"],"corresponding_institution_ids":["https://openalex.org/I4210107558"],"apc_list":null,"apc_paid":null,"fwci":1.8608,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.8710993,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"3","issue":"1","first_page":"1","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lexicon","display_name":"Lexicon","score":0.8546577095985413},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8019567131996155},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7196361422538757},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.6890618801116943},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.6614192724227905},{"id":"https://openalex.org/keywords/text-categorization","display_name":"Text categorization","score":0.6580684185028076},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5944910049438477},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.505969226360321},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4918344020843506},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4863587021827698},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4860551655292511},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4837491512298584},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.46801719069480896},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36392128467559814},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1099814772605896}],"concepts":[{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.8546577095985413},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8019567131996155},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7196361422538757},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.6890618801116943},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.6614192724227905},{"id":"https://openalex.org/C2986744138","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Text categorization","level":3,"score":0.6580684185028076},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5944910049438477},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.505969226360321},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4918344020843506},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4863587021827698},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4860551655292511},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4837491512298584},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.46801719069480896},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36392128467559814},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1099814772605896},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/1138379.1138380","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1138379.1138380","pdf_url":null,"source":{"id":"https://openalex.org/S200945739","display_name":"ACM Transactions on Speech and Language Processing","issn_l":"1550-4875","issn":["1550-4875","1550-4883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Speech and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.385.4673","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.385.4673","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://nmis.isti.cnr.it/sebastiani/Publications/TSLP06.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.79.6348","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.79.6348","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://tcc.itc.it/people/lavelli/papers/TSLP06.pdf","raw_type":"text"},{"id":"pmh:oai:dnet:people______::4c69938fd83a8cb6c9f430d92ab75faa","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S7407055261","display_name":"ISTI Open Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Other"}],"best_oa_location":{"id":"pmh:oai:dnet:people______::4c69938fd83a8cb6c9f430d92ab75faa","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S7407055261","display_name":"ISTI Open Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Other"},"sustainable_development_goals":[{"score":0.7200000286102295,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W22702538","https://openalex.org/W134702066","https://openalex.org/W151200788","https://openalex.org/W191422183","https://openalex.org/W1502749598","https://openalex.org/W1515865104","https://openalex.org/W1562842039","https://openalex.org/W1567659444","https://openalex.org/W1572870503","https://openalex.org/W1576520375","https://openalex.org/W1586373272","https://openalex.org/W1588490261","https://openalex.org/W1590686500","https://openalex.org/W1604938182","https://openalex.org/W1940278502","https://openalex.org/W1977182536","https://openalex.org/W1981617416","https://openalex.org/W1987996059","https://openalex.org/W2009051249","https://openalex.org/W2014706780","https://openalex.org/W2016001305","https://openalex.org/W2027819720","https://openalex.org/W2038721957","https://openalex.org/W2040004971","https://openalex.org/W2040424159","https://openalex.org/W2047959359","https://openalex.org/W2052182675","https://openalex.org/W2052690453","https://openalex.org/W2053463056","https://openalex.org/W2076002267","https://openalex.org/W2107668593","https://openalex.org/W2111989294","https://openalex.org/W2118020653","https://openalex.org/W2120255974","https://openalex.org/W2121300346","https://openalex.org/W2133108446","https://openalex.org/W2149671658","https://openalex.org/W2150102617","https://openalex.org/W2151375725","https://openalex.org/W2157886637","https://openalex.org/W2160765088","https://openalex.org/W2161103800","https://openalex.org/W2161669948","https://openalex.org/W2163953154","https://openalex.org/W2166776180","https://openalex.org/W2170670430","https://openalex.org/W2172142456","https://openalex.org/W2324635907","https://openalex.org/W2435251607","https://openalex.org/W2439017901","https://openalex.org/W2483327705","https://openalex.org/W2560674852","https://openalex.org/W2608239929","https://openalex.org/W2788874293","https://openalex.org/W2997757304","https://openalex.org/W4205171160","https://openalex.org/W4245503759","https://openalex.org/W4299527668","https://openalex.org/W6634442568"],"related_works":["https://openalex.org/W2360898036","https://openalex.org/W2390857744","https://openalex.org/W2133651098","https://openalex.org/W2390698788","https://openalex.org/W2035261173","https://openalex.org/W2150617187","https://openalex.org/W2138922887","https://openalex.org/W2383063829","https://openalex.org/W2155449793","https://openalex.org/W2082678934"],"abstract_inverted_index":{"We":[0,77,126,148],"discuss":[1],"an":[2,40],"approach":[3,55,78],"to":[4,14,71,99],"the":[5,15,63,92,128,150,173,176],"automatic":[6],"expansion":[7],"of":[8,17,36,65,75,114,124,130,133,152,162],"domain-specific":[9,163],"lexicons":[10,94],",":[11,39,60],"that":[12],"is,":[13],"problem":[16,84],"extending,":[18],"for":[19,143,184],"each":[20],"c":[21,30,33],"i":[22,44,51],"in":[23,85,104,111,121,135,175],"a":[24,47,72,81,112,122,131,139,158,166],"predefined":[25,73],"set":[26,74,161],"C":[27],"=":[28],"{":[29],"1":[31,52,180],",\u2026,":[32],"m":[34],"}":[35],"semantic":[37],"domains":[38],"initial":[41,93],"lexicon":[42,49],"L":[43,50],"0":[45],"into":[46],"larger":[48],".":[53],"Our":[54,168],"relies":[56],"on":[57],"term":[58,87,146],"categorization":[59,102],"defined":[61],"as":[62,80,95,109,119,165,181],"task":[64],"labeling":[66],"previously":[67],"unlabeled":[68],"terms":[69,118],"according":[70],"domains.":[76],"this":[79],"supervised":[82],"learning":[83,141],"which":[86,105,136],"classifiers":[88],"are":[89,107,170],"built":[90],"using":[91,156,172],"training":[96,144],"data.":[97],"Dually":[98],"classic":[100],"text":[101],"tasks":[103],"documents":[106,174],"represented":[108],"vectors":[110,120],"space":[113,123],"terms,":[115],"we":[116,137],"represent":[117],"documents.":[125],"present":[127],"results":[129],"number":[132],"experiments":[134,169],"use":[138],"boosting-based":[140],"device":[142],"our":[145,153,185],"classifiers.":[147],"test":[149],"effectiveness":[151],"method":[154],"by":[155],"WordNetDomains,":[157],"well-known":[159],"large":[160],"lexicons,":[164],"benchmark.":[167],"performed":[171],"Reuters":[177],"Corpus":[178],"Volume":[179],"implicit":[182],"representations":[183],"terms.":[186]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
