{"id":"https://openalex.org/W2977981067","doi":"https://doi.org/10.1109/ijcnn.2019.8852388","title":"Multi-task Learning with Bidirectional Language Models for Text Classification","display_name":"Multi-task Learning with Bidirectional Language Models for Text Classification","publication_year":2019,"publication_date":"2019-07-01","ids":{"openalex":"https://openalex.org/W2977981067","doi":"https://doi.org/10.1109/ijcnn.2019.8852388","mag":"2977981067"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn.2019.8852388","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2019.8852388","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100417919","display_name":"Qi Yang","orcid":"https://orcid.org/0000-0002-5425-0932"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qi Yang","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063402368","display_name":"Lin Shang","orcid":"https://orcid.org/0000-0003-1356-1942"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Shang","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100417919"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":1.5402,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.87559123,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.8190984129905701},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8151422739028931},{"id":"https://openalex.org/keywords/multi-task-learning","display_name":"Multi-task learning","score":0.6867599487304688},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6494952440261841},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.588187038898468},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5445958375930786},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.499176025390625},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4791357219219208},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.47363558411598206},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.466729998588562},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.45442628860473633},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4234347343444824},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.20556971430778503},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07617554068565369}],"concepts":[{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.8190984129905701},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8151422739028931},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.6867599487304688},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6494952440261841},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.588187038898468},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5445958375930786},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.499176025390625},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4791357219219208},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.47363558411598206},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.466729998588562},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.45442628860473633},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4234347343444824},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.20556971430778503},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07617554068565369},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn.2019.8852388","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2019.8852388","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1499999342","https://openalex.org/W1522301498","https://openalex.org/W1614298861","https://openalex.org/W1832693441","https://openalex.org/W1896424170","https://openalex.org/W2064675550","https://openalex.org/W2099471712","https://openalex.org/W2117130368","https://openalex.org/W2130942839","https://openalex.org/W2170240176","https://openalex.org/W2248051232","https://openalex.org/W2295072214","https://openalex.org/W2407776548","https://openalex.org/W2511131004","https://openalex.org/W2526425061","https://openalex.org/W2563877105","https://openalex.org/W2585635281","https://openalex.org/W2624871570","https://openalex.org/W2953127297","https://openalex.org/W2962739339","https://openalex.org/W2962897020","https://openalex.org/W2963012544","https://openalex.org/W2963153906","https://openalex.org/W2963249435","https://openalex.org/W2963424663","https://openalex.org/W2963706742","https://openalex.org/W2963842982","https://openalex.org/W2963877604","https://openalex.org/W2964121744","https://openalex.org/W2964289193","https://openalex.org/W2964294651","https://openalex.org/W2964319599","https://openalex.org/W4320013936","https://openalex.org/W6631190155","https://openalex.org/W6636510571","https://openalex.org/W6679436768","https://openalex.org/W6685053522","https://openalex.org/W6685145238","https://openalex.org/W6713582272","https://openalex.org/W6725448924","https://openalex.org/W6739365718"],"related_works":["https://openalex.org/W3047363187","https://openalex.org/W2268150819","https://openalex.org/W2237537322","https://openalex.org/W3119773509","https://openalex.org/W3177373753","https://openalex.org/W2950678851","https://openalex.org/W4301248618","https://openalex.org/W2165343651","https://openalex.org/W2242427765","https://openalex.org/W2075830955"],"abstract_inverted_index":{"Multi-task":[0],"learning":[1,92],"is":[2,40,138,164],"an":[3,104],"effective":[4],"approach":[5,193],"to":[6,44,107,112,116,122,127,140,157,167],"extract":[7,72,117],"task-invariant":[8,74],"features":[9,147,150],"by":[10],"leveraging":[11],"potential":[12],"information":[13,51],"among":[14],"related":[15],"tasks,":[16],"which":[17,179],"improves":[18],"the":[19,30,46,58,68,78,108,124,141,159,168,176],"performance":[20],"of":[21,181],"a":[22,56,86,131,153],"single":[23],"task.":[24],"Most":[25],"existing":[26],"work":[27],"simply":[28],"divides":[29],"whole":[31],"model":[32],"into":[33],"shared":[34,59,125,142],"and":[35,148,162],"private":[36,69,109],"spaces.":[37],"Unfortunately,":[38],"there":[39],"no":[41],"explicit":[42],"mechanism":[43],"prevent":[45],"two":[47],"spaces":[48],"from":[49,52],"merging":[50],"each":[53],"other.":[54],"As":[55],"result,":[57],"space":[60,70],"may":[61,71],"be":[62],"mixed":[63],"with":[64],"task-specific":[65,118,146],"features,":[66,130],"while":[67],"some":[73],"features.":[75,119],"To":[76],"alleviate":[77],"problem":[79],"mentioned,":[80],"in":[81,152],"this":[82],"paper,":[83],"we":[84,99],"propose":[85],"bidirectional":[87],"language":[88,101],"models":[89],"based":[90],"multi-task":[91],"method":[93],"for":[94],"text":[95,184],"classification.":[96],"More":[97],"specifically,":[98],"add":[100],"modelling":[102],"as":[103],"auxiliary":[105],"task":[106],"part,":[110],"aiming":[111],"enhance":[113],"its":[114],"ability":[115],"In":[120],"addition,":[121],"promote":[123],"part":[126],"learn":[128],"common":[129],"loss":[132],"constraint":[133],"via":[134],"uniform":[135],"label":[136],"distribution":[137],"introduced":[139],"part.":[143],"Finally,":[144],"put":[145],"taskinvariant":[149],"together":[151],"weighted":[154],"addition":[155],"way":[156],"form":[158],"final":[160],"representation,":[161],"it":[163],"then":[165],"fed":[166],"corresponding":[169],"softmax":[170],"layer.":[171],"We":[172],"do":[173],"experiments":[174],"on":[175],"FDU-MTL":[177],"dataset":[178],"consists":[180],"16":[182],"different":[183],"classification":[185],"tasks.":[186],"The":[187],"experimental":[188],"results":[189],"show":[190],"that":[191],"our":[192],"outperforms":[194],"other":[195],"typical":[196],"methods.":[197]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
