{"id":"https://openalex.org/W3091049047","doi":"https://doi.org/10.1109/ijcnn48605.2020.9206645","title":"Effects of Architecture and Training on Embedding Geometry and Feature Discriminability in BERT","display_name":"Effects of Architecture and Training on Embedding Geometry and Feature Discriminability in BERT","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3091049047","doi":"https://doi.org/10.1109/ijcnn48605.2020.9206645","mag":"3091049047"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn48605.2020.9206645","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn48605.2020.9206645","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039991534","display_name":"Maksim Podkorytov","orcid":"https://orcid.org/0000-0003-3312-8038"},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Maksim Podkorytov","raw_affiliation_strings":["Department of Computer Science, Florida State University, Tallahassee, FL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Florida State University, Tallahassee, FL, USA","institution_ids":["https://openalex.org/I103163165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008250482","display_name":"Daniel Bi\u015b","orcid":"https://orcid.org/0000-0003-1347-0151"},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Bis","raw_affiliation_strings":["Department of Computer Science, Florida State University, Tallahassee, FL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Florida State University, Tallahassee, FL, USA","institution_ids":["https://openalex.org/I103163165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041378077","display_name":"Jinglun Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinglun Cai","raw_affiliation_strings":["Department of Mathematics, Florida State University, Tallahassee, FL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics, Florida State University, Tallahassee, FL, USA","institution_ids":["https://openalex.org/I103163165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057689198","display_name":"Kobra Amirizirtol","orcid":null},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kobra Amirizirtol","raw_affiliation_strings":["Department of Computer Science, Florida State University, Tallahassee, FL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Florida State University, Tallahassee, FL, USA","institution_ids":["https://openalex.org/I103163165"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102867647","display_name":"Xiuwen Liu","orcid":"https://orcid.org/0000-0002-9320-3872"},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiuwen Liu","raw_affiliation_strings":["Department of Computer Science, Florida State University, Tallahassee, FL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Florida State University, Tallahassee, FL, USA","institution_ids":["https://openalex.org/I103163165"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5039991534"],"corresponding_institution_ids":["https://openalex.org/I103163165"],"apc_list":null,"apc_paid":null,"fwci":0.3977,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.69083106,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7964444756507874},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7602715492248535},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7313333749771118},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6638394594192505},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.5848351120948792},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5625935792922974},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5540364384651184},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5012006759643555},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.43802231550216675},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.43792325258255005},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4194512367248535},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4183153510093689},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39154884219169617},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.36056140065193176},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.08643832802772522}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7964444756507874},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7602715492248535},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7313333749771118},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6638394594192505},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.5848351120948792},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5625935792922974},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5540364384651184},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5012006759643555},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.43802231550216675},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.43792325258255005},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4194512367248535},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4183153510093689},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39154884219169617},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36056140065193176},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.08643832802772522},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn48605.2020.9206645","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn48605.2020.9206645","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7300000190734863,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W179875071","https://openalex.org/W1522301498","https://openalex.org/W1614298861","https://openalex.org/W1987971958","https://openalex.org/W2064675550","https://openalex.org/W2101807845","https://openalex.org/W2113032826","https://openalex.org/W2153579005","https://openalex.org/W2247359815","https://openalex.org/W2250539671","https://openalex.org/W2294798173","https://openalex.org/W2402268235","https://openalex.org/W2525778437","https://openalex.org/W2896457183","https://openalex.org/W2908510526","https://openalex.org/W2910243263","https://openalex.org/W2923014074","https://openalex.org/W2925863688","https://openalex.org/W2937845937","https://openalex.org/W2946417913","https://openalex.org/W2946794439","https://openalex.org/W2952984539","https://openalex.org/W2962739339","https://openalex.org/W2963310665","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2963716420","https://openalex.org/W2964121744","https://openalex.org/W2965373594","https://openalex.org/W2970820321","https://openalex.org/W2972324944","https://openalex.org/W2973827203","https://openalex.org/W2980282514","https://openalex.org/W2988217457","https://openalex.org/W2998704965","https://openalex.org/W3004346089","https://openalex.org/W4285719527","https://openalex.org/W4294170691","https://openalex.org/W4298201654","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6636510571","https://openalex.org/W6676918917","https://openalex.org/W6680532216","https://openalex.org/W6682691769","https://openalex.org/W6691049504","https://openalex.org/W6739901393","https://openalex.org/W6750615492","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6761260114","https://openalex.org/W6761672038","https://openalex.org/W6766293987","https://openalex.org/W6766673545","https://openalex.org/W6767665982"],"related_works":["https://openalex.org/W2081900870","https://openalex.org/W4390516098","https://openalex.org/W2037549926","https://openalex.org/W1950940422","https://openalex.org/W4283822356","https://openalex.org/W2129146436","https://openalex.org/W2032507829","https://openalex.org/W4287644835","https://openalex.org/W3092281475","https://openalex.org/W3098003361"],"abstract_inverted_index":{"Natural":[0],"language":[1],"processing":[2],"has":[3],"improved":[4,30,155],"substantially":[5],"in":[6,174],"the":[7,13,31,44,57,62,70,76,99,107,115,125,130,134,142,167,175],"last":[8],"few":[9],"years":[10],"due":[11,160],"to":[12,105,120,161],"increased":[14],"computational":[15],"power":[16],"and":[17,65,75,97,127,153],"availability":[18],"of":[19,47,72,78,114,124,133],"text":[20],"data.":[21,177],"Bidirectional":[22],"Encoder":[23],"Representations":[24],"from":[25],"Transformers":[26],"(BERT)":[27],"have":[28],"further":[29],"performance":[32,156],"by":[33,170],"using":[34],"an":[35,85],"auto-encoding":[36],"model":[37,166],"that":[38,93,141,165],"incorporates":[39],"larger":[40],"bidirectional":[41],"contexts.":[42],"However,":[43],"underlying":[45],"mechanisms":[46],"BERT":[48,63,143],"for":[49,81,151],"its":[50,66,73,79],"effectiveness":[51,77],"are":[52,94],"not":[53,146],"well":[54],"understood.":[55],"In":[56],"paper":[58],"we":[59],"investigate":[60],"how":[61],"architecture":[64],"pretraining":[67],"protocol":[68],"affect":[69,129],"geometry":[71],"embeddings":[74,126],"features":[80],"classification":[82],"tasks.":[83],"As":[84],"autoencoding":[86],"model,":[87],"during":[88],"pre-training,":[89],"it":[90],"produces":[91],"representations":[92,150],"context":[95],"dependent":[96],"at":[98],"same":[100],"time":[101],"must":[102],"be":[103,159],"able":[104],"\"reconstruct\"":[106],"original":[108],"input":[109],"sentences.":[110],"The":[111],"complex":[112],"interactions":[113],"two":[116],"via":[117],"transformers":[118],"lead":[119],"interesting":[121],"geometric":[122],"properties":[123],"subsequently":[128],"inherent":[131],"discriminability":[132],"resulting":[135],"representations.":[136],"Our":[137],"experimental":[138],"results":[139],"illustrate":[140],"models":[144],"do":[145],"produce":[147],"\"effective\"":[148],"contextualized":[149],"words":[152],"their":[154],"may":[157],"mainly":[158],"fine-tuning":[162],"or":[163],"classifiers":[164],"dependencies":[168],"explicitly":[169],"encoding":[171],"syntactic":[172],"patterns":[173],"training":[176]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
