{"id":"https://openalex.org/W2169772276","doi":"https://doi.org/10.1145/1081870.1081906","title":"Improving discriminative sequential learning with rare--but--important associations","display_name":"Improving discriminative sequential learning with rare--but--important associations","publication_year":2005,"publication_date":"2005-08-21","ids":{"openalex":"https://openalex.org/W2169772276","doi":"https://doi.org/10.1145/1081870.1081906","mag":"2169772276"},"language":"en","primary_location":{"id":"doi:10.1145/1081870.1081906","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1081870.1081906","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the eleventh ACM SIGKDD international conference on Knowledge discovery in data mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012773199","display_name":"Xuan-Hieu Phan","orcid":"https://orcid.org/0000-0002-7640-9190"},"institutions":[{"id":"https://openalex.org/I177738480","display_name":"Japan Advanced Institute of Science and Technology","ror":"https://ror.org/03frj4r98","country_code":"JP","type":"education","lineage":["https://openalex.org/I177738480"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Xuan-Hieu Phan","raw_affiliation_strings":["Japan Advanced Inst. of Science &amp; Technology, Nomi, Ishikawa, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Japan Advanced Inst. of Science &amp; Technology, Nomi, Ishikawa, Japan","institution_ids":["https://openalex.org/I177738480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077641909","display_name":"Le-Minh Nguyen","orcid":"https://orcid.org/0000-0002-2265-1010"},"institutions":[{"id":"https://openalex.org/I177738480","display_name":"Japan Advanced Institute of Science and Technology","ror":"https://ror.org/03frj4r98","country_code":"JP","type":"education","lineage":["https://openalex.org/I177738480"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Le-Minh Nguyen","raw_affiliation_strings":["Japan Advanced Inst. of Science &amp; Technology, Nomi, Ishikawa, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Japan Advanced Inst. of Science &amp; Technology, Nomi, Ishikawa, Japan","institution_ids":["https://openalex.org/I177738480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113459806","display_name":"Tu-Bao Ho","orcid":null},"institutions":[{"id":"https://openalex.org/I177738480","display_name":"Japan Advanced Institute of Science and Technology","ror":"https://ror.org/03frj4r98","country_code":"JP","type":"education","lineage":["https://openalex.org/I177738480"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tu-Bao Ho","raw_affiliation_strings":["Japan Advanced Inst. of Science &amp; Technology, Nomi, Ishikawa, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Japan Advanced Inst. of Science &amp; Technology, Nomi, Ishikawa, Japan","institution_ids":["https://openalex.org/I177738480"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112034357","display_name":"Susumu Horiguchi","orcid":null},"institutions":[{"id":"https://openalex.org/I201537933","display_name":"Tohoku University","ror":"https://ror.org/01dq60k83","country_code":"JP","type":"education","lineage":["https://openalex.org/I201537933"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Susumu Horiguchi","raw_affiliation_strings":["Tohoku University, Sendai, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tohoku University, Sendai, Japan","institution_ids":["https://openalex.org/I201537933"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9189,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.82586791,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"304","last_page":"313"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/crfs","display_name":"CRFS","score":0.9341189861297607},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7820907831192017},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7796049118041992},{"id":"https://openalex.org/keywords/conditional-random-field","display_name":"Conditional random field","score":0.7162312269210815},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6574890613555908},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6437519192695618},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.5520933270454407},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.4984731674194336},{"id":"https://openalex.org/keywords/chunking","display_name":"Chunking (psychology)","score":0.47757580876350403},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4729849100112915},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.4687844514846802},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3678323030471802},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33684831857681274},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.16509771347045898}],"concepts":[{"id":"https://openalex.org/C2775953691","wikidata":"https://www.wikidata.org/wiki/Q5013874","display_name":"CRFS","level":3,"score":0.9341189861297607},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7820907831192017},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7796049118041992},{"id":"https://openalex.org/C152565575","wikidata":"https://www.wikidata.org/wiki/Q1124538","display_name":"Conditional random field","level":2,"score":0.7162312269210815},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6574890613555908},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6437519192695618},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.5520933270454407},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.4984731674194336},{"id":"https://openalex.org/C203357204","wikidata":"https://www.wikidata.org/wiki/Q1089605","display_name":"Chunking (psychology)","level":2,"score":0.47757580876350403},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4729849100112915},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.4687844514846802},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3678323030471802},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33684831857681274},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.16509771347045898},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1081870.1081906","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1081870.1081906","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the eleventh ACM SIGKDD international conference on Knowledge discovery in data mining","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.380.8139","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.380.8139","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.jaist.ac.jp/~bao/papers/N166.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.573.665","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.573.665","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://yaroslavvb.com/papers/phan-improving.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.7699999809265137}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1484413656","https://openalex.org/W1506285740","https://openalex.org/W1592796124","https://openalex.org/W1773803948","https://openalex.org/W1934019294","https://openalex.org/W1969744517","https://openalex.org/W1988790447","https://openalex.org/W1988995507","https://openalex.org/W2008652694","https://openalex.org/W2034691490","https://openalex.org/W2034797903","https://openalex.org/W2036516910","https://openalex.org/W2051434435","https://openalex.org/W2064853889","https://openalex.org/W2095844239","https://openalex.org/W2096175520","https://openalex.org/W2102667697","https://openalex.org/W2107640784","https://openalex.org/W2120247405","https://openalex.org/W2124189704","https://openalex.org/W2125838338","https://openalex.org/W2139193890","https://openalex.org/W2147880316","https://openalex.org/W2154642793","https://openalex.org/W2156515921","https://openalex.org/W2160842254","https://openalex.org/W2162186970","https://openalex.org/W2163844356","https://openalex.org/W2167681385","https://openalex.org/W4252403066","https://openalex.org/W6628750762","https://openalex.org/W6637852806"],"related_works":["https://openalex.org/W1964783010","https://openalex.org/W4250494529","https://openalex.org/W2399696375","https://openalex.org/W2061834489","https://openalex.org/W3088215229","https://openalex.org/W182104056","https://openalex.org/W45206245","https://openalex.org/W50079190","https://openalex.org/W11196620","https://openalex.org/W2062502130"],"abstract_inverted_index":{"Discriminative":[0],"sequential":[1],"learning":[2,144,152],"models":[3,102],"like":[4],"Conditional":[5],"Random":[6],"Fields":[7],"(CRFs)":[8],"have":[9,43],"achieved":[10],"significant":[11,122],"success":[12],"in":[13,64,91,124],"several":[14,40],"areas":[15],"such":[16,78],"as":[17],"natural":[18],"language":[19],"processing":[20],"or":[21],"information":[22],"extraction.":[23],"Their":[24],"key":[25],"advantage":[26],"is":[27],"the":[28,48,65,92,129],"ability":[29],"to":[30,103,128,150],"capture":[31],"various":[32],"non--independent":[33],"and":[34,61,84,114,142,156],"overlapping":[35],"features":[36],"of":[37,88,110],"inputs.":[38],"However,":[39],"unexpected":[41],"pitfalls":[42],"a":[44,71,121,136],"negative":[45],"influence":[46],"on":[47],"model's":[49],"performance;":[50],"these":[51,101],"mainly":[52],"come":[53],"from":[54,160],"an":[55,147],"imbalance":[56],"among":[57],"classes/labels,":[58],"irregular":[59],"phenomena,":[60],"potential":[62,137],"ambiguity":[63],"training":[66,93],"data.":[67,94],"This":[68],"paper":[69],"presents":[70],"data--driven":[72],"approach":[73,133],"that":[74],"can":[75],"deal":[76,104],"with":[77,105,154],"hard--to--predict":[79],"data":[80],"instances":[81],"by":[82,145],"discovering":[83],"emphasizing":[85],"rare--but--important":[86],"associations":[87,96],"statistics":[89],"hidden":[90],"Mined":[95],"are":[97],"then":[98],"incorporated":[99],"into":[100],"difficult":[106],"examples.":[107],"Experimental":[108],"results":[109],"English":[111],"phrase":[112],"chunking":[113],"named":[115],"entity":[116],"recognition":[117],"using":[118],"CRFs":[119],"show":[120],"improvement":[123],"accuracy.":[125],"In":[126],"addition":[127],"technical":[130],"perspective,":[131],"our":[132],"also":[134],"highlights":[135],"connection":[138],"between":[139],"association":[140],"mining":[141],"statistical":[143],"offering":[146],"alternative":[148],"strategy":[149],"enhance":[151],"performance":[153],"interesting":[155],"useful":[157],"patterns":[158],"discovered":[159],"large":[161],"dataset.":[162]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
