{"id":"https://openalex.org/W4377236366","doi":"https://doi.org/10.1145/3581807.3581879","title":"RGFGM-LXMERT-An Improve Architecture Based On LXMERT","display_name":"RGFGM-LXMERT-An Improve Architecture Based On LXMERT","publication_year":2022,"publication_date":"2022-11-17","ids":{"openalex":"https://openalex.org/W4377236366","doi":"https://doi.org/10.1145/3581807.3581879"},"language":"en","primary_location":{"id":"doi:10.1145/3581807.3581879","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581807.3581879","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 11th International Conference on Computing and Pattern Recognition","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103177479","display_name":"Renjie Yu","orcid":"https://orcid.org/0009-0004-6942-1580"},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Renjie Yu","raw_affiliation_strings":["Department of Computer and Information Technology, Guangxi University Xingjian College of Sciences and Liberal Arts, China"],"raw_orcid":"https://orcid.org/0009-0004-6942-1580","affiliations":[{"raw_affiliation_string":"Department of Computer and Information Technology, Guangxi University Xingjian College of Sciences and Liberal Arts, China","institution_ids":["https://openalex.org/I150807315"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5103177479"],"corresponding_institution_ids":["https://openalex.org/I150807315"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17271365,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"491","last_page":"495"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9832000136375427,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9753999710083008,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.823357105255127},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6214554309844971},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6176373958587646},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6124923229217529},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5424753427505493},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5413458347320557},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5269960761070251},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.49065911769866943},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4699811041355133},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4544749855995178},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.430433988571167},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.41999849677085876},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.4148266315460205},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09014284610748291}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.823357105255127},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6214554309844971},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6176373958587646},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6124923229217529},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5424753427505493},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5413458347320557},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5269960761070251},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.49065911769866943},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4699811041355133},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4544749855995178},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.430433988571167},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.41999849677085876},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.4148266315460205},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09014284610748291},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3581807.3581879","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581807.3581879","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 11th International Conference on Computing and Pattern Recognition","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1512222092","https://openalex.org/W1945616565","https://openalex.org/W2022638422","https://openalex.org/W2277195237","https://openalex.org/W2529436507","https://openalex.org/W2567070169","https://openalex.org/W2777449390","https://openalex.org/W2896457183","https://openalex.org/W2950104027","https://openalex.org/W2969862959","https://openalex.org/W2996564870","https://openalex.org/W3010768098","https://openalex.org/W3152698349","https://openalex.org/W3185341429","https://openalex.org/W6637618735","https://openalex.org/W6639102338","https://openalex.org/W6640773114","https://openalex.org/W6755977528","https://openalex.org/W6765827731","https://openalex.org/W6767278793","https://openalex.org/W6768257583"],"related_works":["https://openalex.org/W2159052453","https://openalex.org/W3013693939","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W2502115930","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W2734887215","https://openalex.org/W2803255133","https://openalex.org/W2909431601"],"abstract_inverted_index":{"LXMERT":[0,75,259],"(Learning":[1],"Cross-Modality":[2],"Encoder":[3],"Representations":[4],"from":[5],"Transformers)":[6],"is":[7,54,60,64,86,127,170,199,213],"a":[8,28,43,78,165],"two-stream":[9],"cross-modality":[10],"pre-trained":[11],"model":[12,40,52,76,106,115,135,156,163,185,189,204,226,229,233],"that":[13,252],"performs":[14,234],"well":[15],"in":[16,129,140,179],"different":[17,265],"downstream":[18],"tasks":[19],"which":[20,152,220],"contain":[21],"two":[22],"visual":[23,147],"question":[24],"answering":[25],"datasets":[26],"and":[27,35,62,82,102,113,146,158,228,260],"challenging":[29],"visual-reasoning":[30],"task":[31,239],"(i.e.,":[32,243],"VQA,":[33],"GQA,":[34],"NLVR).":[36],"But":[37],"the":[38,51,57,74,97,105,110,114,118,130,134,138,142,155,162,174,180,188,193,197,203,223,237,241,246,262,268],"large-scale":[39],"still":[41],"has":[42],"lot":[44,79],"of":[45,80,104,133,183,225,264],"room":[46],"for":[47,258],"progress.":[48],"That":[49],"is,":[50],"accuracy":[53,103,157],"very":[55],"low,":[56],"generalization":[58,100,159],"ability":[59,224],"weak,":[61],"it":[63],"easy":[65],"to":[66,90,95,172,201],"be":[67],"attacked":[68],"by":[69,107,136,177,215],"adversarial":[70,125],"attacks.":[71],"Furthermore,":[72],"training":[73,98,111,119,126,175],"takes":[77],"time":[81],"money,":[83],"so":[84],"there":[85],"an":[87],"urgent":[88],"need":[89],"improve.":[91],"Thus,":[92],"I":[93],"try":[94],"improve":[96,173],"speed,":[99],"ability,":[101],"enhancing":[108],"both":[109,141],"method":[112],"structure.":[116],"In":[117,161,245],"method,":[120],"FGM":[121],"(Fast":[122],"Gradient":[123],"Method)":[124],"introduced":[128],"finetune":[131],"phase":[132,182],"adding":[137],"disturbances":[139],"language":[143],"embedding":[144],"layer's":[145,150],"feature":[148],"linear":[149],"weights,":[151],"effectively":[153],"improves":[154,222],"ability.":[160],"structure,":[164,196],"residual":[166],"block":[167],"with":[168],"weight":[169],"used":[171],"speed":[176],"1.6%":[178],"pre-training":[181],"this":[184],"without":[186],"losing":[187],"performance.":[190,230],"Next,":[191],"t":[192],"most":[194],"important":[195],"Encoder,":[198],"redesigned":[200],"make":[202],"more":[205],"convergent.":[206],"The":[207,231],"Encoder's":[208],"FFN":[209],"(Feed-Forward":[210],"Neural":[211],"Network)":[212],"replaced":[214],"GLU":[216],"(Gated":[217],"Linear":[218],"Unit),":[219],"also":[221],"fitting":[227],"improved":[232],"better":[235],"on":[236,267],"VQA":[238],"than":[240],"benchmark":[242],"LXMERT).":[244],"end,":[247],"detailed":[248],"ablation":[249],"studies":[250],"prove":[251],"my":[253],"enhancement":[254],"strategies":[255],"are":[256],"effective":[257],"observe":[261],"effectiveness":[263],"measures":[266],"model.":[269]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
