{"id":"https://openalex.org/W4390241396","doi":"https://doi.org/10.1109/tmm.2023.3347093","title":"LOIS: Looking Out of Instance Semantics for Visual Question Answering","display_name":"LOIS: Looking Out of Instance Semantics for Visual Question Answering","publication_year":2023,"publication_date":"2023-12-26","ids":{"openalex":"https://openalex.org/W4390241396","doi":"https://doi.org/10.1109/tmm.2023.3347093"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2023.3347093","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2023.3347093","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://bura.brunel.ac.uk/bitstream/2438/28249/1/FullText.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100347374","display_name":"Siyu Zhang","orcid":"https://orcid.org/0000-0002-0001-0204"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Siyu Zhang","raw_affiliation_strings":["Department of Computer Science and Technology, Tongji University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-0001-0204","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102932116","display_name":"Yeming Chen","orcid":"https://orcid.org/0009-0005-5515-1943"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yeming Chen","raw_affiliation_strings":["Department of Computer Science and Technology, Tongji University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0005-5515-1943","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102966948","display_name":"Yaoru Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaoru Sun","raw_affiliation_strings":["Department of Computer Science and Technology, Tongji University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-2179-0713","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100321464","display_name":"Fang Wang","orcid":"https://orcid.org/0000-0003-1987-9150"},"institutions":[{"id":"https://openalex.org/I59433898","display_name":"Brunel University of London","ror":"https://ror.org/00dn4t376","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I59433898"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Fang Wang","raw_affiliation_strings":["Department of Computer Science, Brunel University, Uxbridge, U.K"],"raw_orcid":"https://orcid.org/0000-0003-1987-9150","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Brunel University, Uxbridge, U.K","institution_ids":["https://openalex.org/I59433898"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101667506","display_name":"Haibo Shi","orcid":"https://orcid.org/0000-0001-9976-6927"},"institutions":[{"id":"https://openalex.org/I181679659","display_name":"Shanghai University of Finance and Economics","ror":"https://ror.org/00wtvfq62","country_code":"CN","type":"education","lineage":["https://openalex.org/I181679659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haibo Shi","raw_affiliation_strings":["School of Statistics and Management, Shanghai University of Finance and Economics, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-9976-6927","affiliations":[{"raw_affiliation_string":"School of Statistics and Management, Shanghai University of Finance and Economics, Shanghai, China","institution_ids":["https://openalex.org/I181679659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100324824","display_name":"Haoran Wang","orcid":"https://orcid.org/0000-0002-4622-0119"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoran Wang","raw_affiliation_strings":["Department of Computer Science and Technology, Tongji University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-4622-0119","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100347374"],"corresponding_institution_ids":["https://openalex.org/I116953780"],"apc_list":null,"apc_paid":null,"fwci":1.2952,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.83062306,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"26","issue":null,"first_page":"6202","last_page":"6214"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.8928903341293335},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8115535974502563},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.586513102054596},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5510618090629578},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5480031967163086},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5038246512413025},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.4872359335422516},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4525071084499359},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.44111502170562744},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.19881278276443481},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.1503511369228363}],"concepts":[{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.8928903341293335},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8115535974502563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.586513102054596},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5510618090629578},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5480031967163086},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5038246512413025},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.4872359335422516},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4525071084499359},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.44111502170562744},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.19881278276443481},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.1503511369228363}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tmm.2023.3347093","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2023.3347093","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},{"id":"pmh:oai:bura.brunel.ac.uk:2438/28249","is_oa":true,"landing_page_url":"https://bura.brunel.ac.uk/handle/2438/28249","pdf_url":"http://bura.brunel.ac.uk/bitstream/2438/28249/1/FullText.pdf","source":{"id":"https://openalex.org/S4306401473","display_name":"Brunel University Research Archive (BURA) (Brunel University London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I59433898","host_organization_name":"Brunel University of London","host_organization_lineage":["https://openalex.org/I59433898"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":{"id":"pmh:oai:bura.brunel.ac.uk:2438/28249","is_oa":true,"landing_page_url":"https://bura.brunel.ac.uk/handle/2438/28249","pdf_url":"http://bura.brunel.ac.uk/bitstream/2438/28249/1/FullText.pdf","source":{"id":"https://openalex.org/S4306401473","display_name":"Brunel University Research Archive (BURA) (Brunel University London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I59433898","host_organization_name":"Brunel University of London","host_organization_lineage":["https://openalex.org/I59433898"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6700000166893005,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2718252163","display_name":null,"funder_award_id":"91748122","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390241396.pdf"},"referenced_works_count":60,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1933349210","https://openalex.org/W2142192571","https://openalex.org/W2529436507","https://openalex.org/W2560730294","https://openalex.org/W2745461083","https://openalex.org/W2807941860","https://openalex.org/W2887967003","https://openalex.org/W2894842749","https://openalex.org/W2896457183","https://openalex.org/W2896902935","https://openalex.org/W2905571389","https://openalex.org/W2916723116","https://openalex.org/W2949197413","https://openalex.org/W2963028801","https://openalex.org/W2963644680","https://openalex.org/W2963954913","https://openalex.org/W2964072591","https://openalex.org/W2964345214","https://openalex.org/W2966683369","https://openalex.org/W2968124245","https://openalex.org/W2970231061","https://openalex.org/W3004349648","https://openalex.org/W3005881764","https://openalex.org/W3014611590","https://openalex.org/W3026441479","https://openalex.org/W3033514332","https://openalex.org/W3034681942","https://openalex.org/W3034826836","https://openalex.org/W3035497460","https://openalex.org/W3036148046","https://openalex.org/W3037773948","https://openalex.org/W3043547428","https://openalex.org/W3087338569","https://openalex.org/W3087975588","https://openalex.org/W3090449556","https://openalex.org/W3100786684","https://openalex.org/W3168972675","https://openalex.org/W3173961205","https://openalex.org/W3185066916","https://openalex.org/W3187176672","https://openalex.org/W3195963939","https://openalex.org/W3198817488","https://openalex.org/W3207886649","https://openalex.org/W3216470601","https://openalex.org/W4200475325","https://openalex.org/W4210484952","https://openalex.org/W4225791300","https://openalex.org/W4312246181","https://openalex.org/W4382202719","https://openalex.org/W4385696173","https://openalex.org/W6634232107","https://openalex.org/W6728881024","https://openalex.org/W6755207826","https://openalex.org/W6766904570","https://openalex.org/W6767211374","https://openalex.org/W6767279747","https://openalex.org/W6775188310","https://openalex.org/W6784930956","https://openalex.org/W6789909235"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W2964061310","https://openalex.org/W2231285690","https://openalex.org/W2963477107","https://openalex.org/W4390091918","https://openalex.org/W3093385053","https://openalex.org/W4389682534","https://openalex.org/W2553418567","https://openalex.org/W3045060014"],"abstract_inverted_index":{"Visual":[0],"question":[1],"answering":[2],"(VQA)":[3],"has":[4,200],"been":[5],"intensively":[6],"studied":[7],"as":[8],"a":[9,56,81,151],"multimodal":[10],"task,":[11],"requiring":[12],"efforts":[13],"to":[14,62,100,112,117,139,156],"bridge":[15],"vision":[16],"and":[17,64,134,159,167],"language":[18],"for":[19,30,59],"correct":[20,142],"answer":[21],"inference.":[22],"Recent":[23],"attempts":[24],"have":[25],"developed":[26],"various":[27],"attention-based":[28],"modules":[29],"solving":[31],"VQA":[32,60,193],"tasks.":[33],"However,":[34],"the":[35,67,119,141],"performance":[36,202],"of":[37,70,96,128],"model":[38,83,157,175],"inference":[39],"is":[40],"largely":[41],"bottlenecked":[42],"by":[43,123,182],"visual":[44,114,146,161,205],"semantic":[45,162],"comprehension.":[46],"Most":[47],"existing":[48],"detection":[49],"methods":[50],"rely":[51],"on":[52,184,190],"bounding":[53,86],"boxes,":[54],"remaining":[55],"serious":[57],"challenge":[58],"models":[61],"comprehend":[63],"correctly":[65],"infer":[66,140],"causal":[68],"nexus":[69],"contextual":[71],"object":[72],"semantics":[73],"in":[74,88,203],"images.":[75],"To":[76],"this":[77,89,102],"end,":[78],"we":[79,149],"propose":[80],"finer":[82],"framework":[84],"without":[85],"boxes":[87],"work,":[90],"termed":[91],"<italic":[92],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[93],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Looking":[94],"Out":[95],"Instance":[97],"Semantics":[98],"(LOIS)</i>":[99],"address":[101],"crucial":[103],"issue.":[104],"LOIS":[105],"can":[106,176],"achieve":[107],"more":[108],"fine-grained":[109],"feature":[110],"descriptions":[111],"generate":[113],"facts.":[115],"Furthermore,":[116],"overcome":[118],"label":[120],"ambiguity":[121],"caused":[122],"instance":[124,165],"masks,":[125],"two":[126],"types":[127],"relation":[129,153],"attention":[130,154,174],"modules:":[131],"1)":[132],"intra-modality":[133],"2)":[135],"inter-modality,":[136],"are":[137],"devised":[138],"answers":[143],"from":[144],"different":[145],"features.":[147],"Specifically,":[148],"implement":[150],"mutual":[152],"module":[155],"sophisticated":[158],"deeper":[160],"relations":[163],"between":[164],"objects":[166],"background":[168],"information.":[169],"In":[170],"addition,":[171],"our":[172,197],"proposed":[173,198],"further":[177],"analyze":[178],"salient":[179],"image":[180],"regions":[181],"focusing":[183],"important":[185],"word-related":[186],"questions.":[187],"Experimental":[188],"results":[189],"four":[191],"benchmark":[192],"datasets":[194],"prove":[195],"that":[196],"method":[199],"favorable":[201],"improving":[204],"reasoning":[206],"capability.":[207]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
