{"id":"https://openalex.org/W2977466972","doi":"https://doi.org/10.18653/v1/d19-5556","title":"Lexical Features Are More Vulnerable, Syntactic Features Have More Predictive Power","display_name":"Lexical Features Are More Vulnerable, Syntactic Features Have More Predictive Power","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2977466972","doi":"https://doi.org/10.18653/v1/d19-5556","mag":"2977466972"},"language":"en","primary_location":{"id":"doi:10.18653/v1/d19-5556","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d19-5556","pdf_url":"https://www.aclweb.org/anthology/D19-5556.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/D19-5556.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050825493","display_name":"Jekaterina Novikova","orcid":"https://orcid.org/0000-0003-4754-6126"},"institutions":[{"id":"https://openalex.org/I1301653859","display_name":"nLIGHT (United States)","ror":"https://ror.org/01se7j361","country_code":"US","type":"company","lineage":["https://openalex.org/I1301653859"]},{"id":"https://openalex.org/I32062511","display_name":"Heriot-Watt University","ror":"https://ror.org/04mghma93","country_code":"GB","type":"education","lineage":["https://openalex.org/I32062511"]}],"countries":["GB","US"],"is_corresponding":true,"raw_author_name":"Jekaterina Novikova","raw_affiliation_strings":["Winterlight Labs, {jekaterina,","Heriot-Watt University, Edinburgh, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Winterlight Labs, {jekaterina,","institution_ids":["https://openalex.org/I1301653859"]},{"raw_affiliation_string":"Heriot-Watt University, Edinburgh, United Kingdom","institution_ids":["https://openalex.org/I32062511"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010008842","display_name":"Aparna Balagopalan","orcid":"https://orcid.org/0000-0003-1621-9536"},"institutions":[{"id":"https://openalex.org/I1301653859","display_name":"nLIGHT (United States)","ror":"https://ror.org/01se7j361","country_code":"US","type":"company","lineage":["https://openalex.org/I1301653859"]},{"id":"https://openalex.org/I78650965","display_name":"TU Dresden","ror":"https://ror.org/042aqky30","country_code":"DE","type":"education","lineage":["https://openalex.org/I78650965"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Aparna Balagopalan","raw_affiliation_strings":["Winterlight Labs, {jekaterina,","TU Dresden, Dresden, Germany"],"affiliations":[{"raw_affiliation_string":"Winterlight Labs, {jekaterina,","institution_ids":["https://openalex.org/I1301653859"]},{"raw_affiliation_string":"TU Dresden, Dresden, Germany","institution_ids":["https://openalex.org/I78650965"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019277108","display_name":"Ksenia Shkaruta","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]},{"id":"https://openalex.org/I2800444561","display_name":"Atlanta Technical College","ror":"https://ror.org/01s3vfp47","country_code":"US","type":"education","lineage":["https://openalex.org/I2800444561"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ksenia Shkaruta","raw_affiliation_strings":["Georgia Tech,","Georgia Institute of Technology, Atlanta, United States"],"affiliations":[{"raw_affiliation_string":"Georgia Tech,","institution_ids":["https://openalex.org/I2800444561","https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, United States","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056256317","display_name":"Frank Rudzicz","orcid":"https://orcid.org/0000-0002-1139-3423"},"institutions":[{"id":"https://openalex.org/I4210127509","display_name":"Vector Institute","ror":"https://ror.org/03kqdja62","country_code":"CA","type":"facility","lineage":["https://openalex.org/I4210127509"]},{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]},{"id":"https://openalex.org/I1301653859","display_name":"nLIGHT (United States)","ror":"https://ror.org/01se7j361","country_code":"US","type":"company","lineage":["https://openalex.org/I1301653859"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Frank Rudzicz","raw_affiliation_strings":["University of Toronto; Vector Institute for Artificial Intelligence,","Winterlight Labs, {jekaterina,","University of Toronto, Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"University of Toronto; Vector Institute for Artificial Intelligence,","institution_ids":["https://openalex.org/I4210127509"]},{"raw_affiliation_string":"Winterlight Labs, {jekaterina,","institution_ids":["https://openalex.org/I1301653859"]},{"raw_affiliation_string":"University of Toronto, Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5050825493"],"corresponding_institution_ids":["https://openalex.org/I1301653859","https://openalex.org/I32062511"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.11601041,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"431","last_page":"443"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7611364722251892},{"id":"https://openalex.org/keywords/lexicon","display_name":"Lexicon","score":0.7131776809692383},{"id":"https://openalex.org/keywords/syntax","display_name":"Syntax","score":0.6940362453460693},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6654284000396729},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6268987655639648},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6064285635948181},{"id":"https://openalex.org/keywords/predictive-power","display_name":"Predictive power","score":0.5160738825798035},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.4357268214225769},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.38704806566238403}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7611364722251892},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.7131776809692383},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.6940362453460693},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6654284000396729},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6268987655639648},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6064285635948181},{"id":"https://openalex.org/C2778136018","wikidata":"https://www.wikidata.org/wiki/Q10350689","display_name":"Predictive power","level":2,"score":0.5160738825798035},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.4357268214225769},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.38704806566238403},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/d19-5556","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d19-5556","pdf_url":"https://www.aclweb.org/anthology/D19-5556.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1910.00065","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1910.00065","pdf_url":"https://arxiv.org/pdf/1910.00065","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2977466972","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/1910.00065","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1910.00065","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1910.00065","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.18653/v1/d19-5556","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d19-5556","pdf_url":"https://www.aclweb.org/anthology/D19-5556.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.75,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2977466972.pdf","grobid_xml":"https://content.openalex.org/works/W2977466972.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W389331340","https://openalex.org/W654197657","https://openalex.org/W1524429405","https://openalex.org/W1606556278","https://openalex.org/W1965948809","https://openalex.org/W1971020201","https://openalex.org/W1972636262","https://openalex.org/W1991248487","https://openalex.org/W1994871153","https://openalex.org/W2007231077","https://openalex.org/W2019904950","https://openalex.org/W2027790310","https://openalex.org/W2054870958","https://openalex.org/W2063042856","https://openalex.org/W2071516704","https://openalex.org/W2074037951","https://openalex.org/W2084046180","https://openalex.org/W2085906926","https://openalex.org/W2090655260","https://openalex.org/W2094373498","https://openalex.org/W2098697817","https://openalex.org/W2101234009","https://openalex.org/W2103118550","https://openalex.org/W2113459411","https://openalex.org/W2113646895","https://openalex.org/W2117373735","https://openalex.org/W2126895642","https://openalex.org/W2136914353","https://openalex.org/W2148143831","https://openalex.org/W2153182733","https://openalex.org/W2158149850","https://openalex.org/W2163793419","https://openalex.org/W2166706824","https://openalex.org/W2171967745","https://openalex.org/W2250883471","https://openalex.org/W2251658484","https://openalex.org/W2293324445","https://openalex.org/W2383013545","https://openalex.org/W2398854657","https://openalex.org/W2512302303","https://openalex.org/W2741424578","https://openalex.org/W2748199382","https://openalex.org/W2773070064","https://openalex.org/W2786339580","https://openalex.org/W2886532121","https://openalex.org/W2891407789","https://openalex.org/W2902673180","https://openalex.org/W2914397182","https://openalex.org/W2962718684","https://openalex.org/W2963018534","https://openalex.org/W3105484828"],"related_works":["https://openalex.org/W2985840584","https://openalex.org/W2252136171","https://openalex.org/W2091047936","https://openalex.org/W2964110027","https://openalex.org/W1758944695","https://openalex.org/W2992470167","https://openalex.org/W2066615580","https://openalex.org/W2981039467","https://openalex.org/W2405052758","https://openalex.org/W2972351548","https://openalex.org/W3023706255","https://openalex.org/W2167679664","https://openalex.org/W1596802872","https://openalex.org/W2251104295","https://openalex.org/W2952563049","https://openalex.org/W2404246799","https://openalex.org/W2250652819","https://openalex.org/W182940136","https://openalex.org/W3164693096","https://openalex.org/W2096315187"],"abstract_inverted_index":{"Understanding":[0],"the":[1,41,59,67,116],"vulnerability":[2,50],"of":[3,24,51,61,69,71,78,103,118,136,141],"linguistic":[4],"features":[5,52,85,105],"extracted":[6],"from":[7,55],"noisy":[8],"text":[9,17,47,79,90],"is":[10,53],"important":[11],"for":[12,21],"both":[13,142],"developing":[14],"better":[15],"health":[16],"classification":[18,111],"models":[19],"and":[20,65,138,144],"interpreting":[22],"vulnerabilities":[23],"natural":[25],"language":[26,35],"models.":[27],"In":[28],"this":[29],"paper,":[30],"we":[31,96],"investigate":[32],"how":[33],"generic":[34],"characteristics,":[36],"such":[37],"as":[38,75],"syntax":[39],"or":[40],"lexicon,":[42],"are":[43,86,124],"impacted":[44],"by":[45],"artificial":[46],"alterations.":[48],"The":[49],"analysed":[54],"two":[56],"perspectives:":[57],"(1)":[58],"level":[60,68],"feature":[62,72],"value":[63],"change,":[64],"(2)":[66],"change":[70],"predictive":[73],"power":[74],"a":[76,107],"result":[77],"modifications.":[80],"We":[81],"show":[82],"that":[83,99],"lexical":[84,121,137],"more":[87],"sensitive":[88],"to":[89,115,120],"modifications":[91],"than":[92],"syntactic":[93,104,139],"ones.":[94],"However,":[95],"also":[97],"demonstrate":[98],"these":[100],"smaller":[101],"changes":[102,119],"have":[106],"stronger":[108],"influence":[109],"on":[110],"performance":[112],"downstream,":[113],"compared":[114],"impact":[117],"features.":[122],"Results":[123],"validated":[125],"across":[126],"three":[127],"datasets":[128],"representing":[129],"different":[130,134],"text-classification":[131],"tasks,":[132],"with":[133],"levels":[135],"complexity":[140],"conversational":[143],"written":[145],"language.":[146]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-21T08:13:44.787528","created_date":"2022-07-28T00:00:00"}
