{"id":"https://openalex.org/W2481642436","doi":"https://doi.org/10.18653/v1/w17-4401","title":"Boundary-based MWE segmentation with text partitioning","display_name":"Boundary-based MWE segmentation with text partitioning","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2481642436","doi":"https://doi.org/10.18653/v1/w17-4401","mag":"2481642436"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w17-4401","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-4401","pdf_url":"https://www.aclweb.org/anthology/W17-4401.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd Workshop on Noisy User-generated Text","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W17-4401.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101587041","display_name":"Jake Ryland Williams","orcid":"https://orcid.org/0000-0002-7050-8403"},"institutions":[{"id":"https://openalex.org/I72816309","display_name":"Drexel University","ror":"https://ror.org/04bdffz58","country_code":"US","type":"education","lineage":["https://openalex.org/I72816309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jake Williams","raw_affiliation_strings":["Drexel University 30 N. 33rd Street Philadelphia, PA 19104","Drexel University, Philadelphia, United States"],"affiliations":[{"raw_affiliation_string":"Drexel University 30 N. 33rd Street Philadelphia, PA 19104","institution_ids":["https://openalex.org/I72816309"]},{"raw_affiliation_string":"Drexel University, Philadelphia, United States","institution_ids":["https://openalex.org/I72816309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5101587041"],"corresponding_institution_ids":["https://openalex.org/I72816309"],"apc_list":null,"apc_paid":null,"fwci":0.45835928,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.68087508,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8392336368560791},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5292070508003235},{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.5272060036659241},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5259326100349426},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5246644020080566},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.47905075550079346},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4742010235786438},{"id":"https://openalex.org/keywords/generality","display_name":"Generality","score":0.45491623878479004},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.4426407217979431},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.42526906728744507}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8392336368560791},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5292070508003235},{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.5272060036659241},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5259326100349426},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5246644020080566},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.47905075550079346},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4742010235786438},{"id":"https://openalex.org/C2780767217","wikidata":"https://www.wikidata.org/wiki/Q5532421","display_name":"Generality","level":2,"score":0.45491623878479004},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.4426407217979431},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.42526906728744507},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C542102704","wikidata":"https://www.wikidata.org/wiki/Q183257","display_name":"Psychotherapist","level":1,"score":0.0},{"id":"https://openalex.org/C27206212","wikidata":"https://www.wikidata.org/wiki/Q34178","display_name":"Theology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/w17-4401","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-4401","pdf_url":"https://www.aclweb.org/anthology/W17-4401.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd Workshop on Noisy User-generated Text","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1608.02025","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1608.02025","pdf_url":"https://arxiv.org/pdf/1608.02025","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2481642436","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1608.02025.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1608.02025","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1608.02025","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.18653/v1/w17-4401","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-4401","pdf_url":"https://www.aclweb.org/anthology/W17-4401.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd Workshop on Noisy User-generated Text","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6800000071525574,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320310343","display_name":"Drexel University","ror":"https://ror.org/04bdffz58"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2481642436.pdf","grobid_xml":"https://content.openalex.org/works/W2481642436.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W630883834","https://openalex.org/W634820411","https://openalex.org/W638675188","https://openalex.org/W1498763386","https://openalex.org/W1593045043","https://openalex.org/W1595276309","https://openalex.org/W1623072288","https://openalex.org/W1965693266","https://openalex.org/W1973346430","https://openalex.org/W1974731480","https://openalex.org/W2036671379","https://openalex.org/W2042380438","https://openalex.org/W2081580037","https://openalex.org/W2101196694","https://openalex.org/W2132225982","https://openalex.org/W2155501171","https://openalex.org/W2155848762","https://openalex.org/W2158150553","https://openalex.org/W2167493906","https://openalex.org/W2170505850","https://openalex.org/W2224490803","https://openalex.org/W2250899493","https://openalex.org/W2251062999","https://openalex.org/W2251491468","https://openalex.org/W2316904474","https://openalex.org/W2467466606","https://openalex.org/W2477814733","https://openalex.org/W2490634156","https://openalex.org/W2511006972","https://openalex.org/W2511102264","https://openalex.org/W2514946965","https://openalex.org/W2566515444","https://openalex.org/W2575987980","https://openalex.org/W2726537443","https://openalex.org/W2953320089"],"related_works":["https://openalex.org/W2224490803","https://openalex.org/W3115512991","https://openalex.org/W3106416961","https://openalex.org/W3049650565","https://openalex.org/W3112439891","https://openalex.org/W2735125076","https://openalex.org/W91366081","https://openalex.org/W3213043232","https://openalex.org/W2080906443","https://openalex.org/W3185783300","https://openalex.org/W3098744621","https://openalex.org/W1989280373","https://openalex.org/W3098265177","https://openalex.org/W3103090120","https://openalex.org/W3004169553","https://openalex.org/W3122479087","https://openalex.org/W3170465488","https://openalex.org/W2964224278","https://openalex.org/W3186231457","https://openalex.org/W229004428"],"abstract_inverted_index":{"This":[0,39,119],"work":[1],"presents":[2],"a":[3,17,22,31,52,88,123],"fine-grained,":[4],"text-chunking":[5],"algorithm":[6],"designed":[7],"for":[8,33,75,131],"the":[9,34,101,139],"task":[10],"of":[11,25,36,46,69,141],"multiword":[12],"expressions":[13],"(MWEs)":[14],"segmentation.":[15],"As":[16],"lexical":[18],"class,":[19],"MWEs":[20],"include":[21],"wide":[23],"variety":[24],"idioms,":[26],"whose":[27],"automatic":[28],"identification":[29],"are":[30,136],"necessity":[32],"handling":[35],"colloquial":[37],"language.":[38],"algorithm's":[40],"core":[41],"novelty":[42],"is":[43],"its":[44,76],"use":[45],"non-word":[47],"tokens,":[48],"i.e.,":[49],"boundaries,":[50],"in":[51],"bottom-up":[53],"strategy.":[54],"Leveraging":[55],"boundaries":[56],"refines":[57],"token-level":[58],"information,":[59],"forging":[60],"high-level":[61],"performance":[62],"from":[63],"relatively":[64],"basic":[65],"data.":[66],"The":[67],"generality":[68],"this":[70],"model's":[71],"feature":[72],"space":[73],"allows":[74],"application":[77],"across":[78],"languages":[79,86],"and":[80,112],"domains.":[81],"Experiments":[82],"spanning":[83],"19":[84],"different":[85],"exhibit":[87],"broadly-applicable,":[89],"state-of-the-art":[90],"model.":[91],"Evaluation":[92],"against":[93],"recent":[94],"shared-task":[95],"data":[96],"places":[97],"text":[98],"partitioning":[99],"as":[100],"overall,":[102],"best":[103],"performing":[104],"MWE":[105,110],"segmentation":[106],"algorithm,":[107],"covering":[108],"all":[109],"classes":[111],"multiple":[113],"English":[114],"domains":[115],"(including":[116],"user-generated":[117],"text).":[118],"performance,":[120],"coupled":[121],"with":[122],"non-combinatorial,":[124],"fast-running":[125],"design,":[126],"produces":[127],"an":[128],"ideal":[129],"combination":[130],"implementations":[132],"at":[133],"scale,":[134],"which":[135],"facilitated":[137],"through":[138],"release":[140],"open-source":[142],"software.":[143]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
