{"id":"https://openalex.org/W2793546384","doi":"https://doi.org/10.1109/cvpr.2018.00519","title":"Transparency by Design: Closing the Gap Between Performance and Interpretability in Visual Reasoning","display_name":"Transparency by Design: Closing the Gap Between Performance and Interpretability in Visual Reasoning","publication_year":2018,"publication_date":"2018-06-01","ids":{"openalex":"https://openalex.org/W2793546384","doi":"https://doi.org/10.1109/cvpr.2018.00519","mag":"2793546384"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr.2018.00519","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr.2018.00519","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1803.05268","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014156199","display_name":"David Mascharka","orcid":null},"institutions":[{"id":"https://openalex.org/I4210122954","display_name":"MIT Lincoln Laboratory","ror":"https://ror.org/022z6jk58","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210122954","https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"David Mascharka","raw_affiliation_strings":["MIT Lincoln Laboratory"],"affiliations":[{"raw_affiliation_string":"MIT Lincoln Laboratory","institution_ids":["https://openalex.org/I4210122954"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086303242","display_name":"Philip Tran","orcid":null},"institutions":[{"id":"https://openalex.org/I38645685","display_name":"Spirit AeroSystems (United States)","ror":"https://ror.org/04mekkx88","country_code":"US","type":"company","lineage":["https://openalex.org/I38645685"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Philip Tran","raw_affiliation_strings":["Planck Aerosystems"],"affiliations":[{"raw_affiliation_string":"Planck Aerosystems","institution_ids":["https://openalex.org/I38645685"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044026239","display_name":"Ryan Soklaski","orcid":null},"institutions":[{"id":"https://openalex.org/I4210122954","display_name":"MIT Lincoln Laboratory","ror":"https://ror.org/022z6jk58","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210122954","https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ryan Soklaski","raw_affiliation_strings":["MIT Lincoln Laboratory"],"affiliations":[{"raw_affiliation_string":"MIT Lincoln Laboratory","institution_ids":["https://openalex.org/I4210122954"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004995502","display_name":"Arjun Majumdar","orcid":"https://orcid.org/0000-0002-9113-3583"},"institutions":[{"id":"https://openalex.org/I4210122954","display_name":"MIT Lincoln Laboratory","ror":"https://ror.org/022z6jk58","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210122954","https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arjun Majumdar","raw_affiliation_strings":["MIT Lincoln Laboratory"],"affiliations":[{"raw_affiliation_string":"MIT Lincoln Laboratory","institution_ids":["https://openalex.org/I4210122954"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5014156199"],"corresponding_institution_ids":["https://openalex.org/I4210122954"],"apc_list":null,"apc_paid":null,"fwci":13.4019,"has_fulltext":false,"cited_by_count":205,"citation_normalized_percentile":{"value":0.98999069,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"4942","last_page":"4950"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.8713735938072205},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7943891286849976},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.7459004521369934},{"id":"https://openalex.org/keywords/transparency","display_name":"Transparency (behavior)","score":0.6562750339508057},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6417319774627686},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.5549757480621338},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4521328806877136},{"id":"https://openalex.org/keywords/reasoning-system","display_name":"Reasoning system","score":0.45090287923812866},{"id":"https://openalex.org/keywords/strengths-and-weaknesses","display_name":"Strengths and weaknesses","score":0.44637852907180786},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41906532645225525},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09380015730857849}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.8713735938072205},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7943891286849976},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.7459004521369934},{"id":"https://openalex.org/C2780233690","wikidata":"https://www.wikidata.org/wiki/Q535347","display_name":"Transparency (behavior)","level":2,"score":0.6562750339508057},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6417319774627686},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.5549757480621338},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4521328806877136},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.45090287923812866},{"id":"https://openalex.org/C63882131","wikidata":"https://www.wikidata.org/wiki/Q17122954","display_name":"Strengths and weaknesses","level":2,"score":0.44637852907180786},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41906532645225525},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09380015730857849},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/cvpr.2018.00519","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr.2018.00519","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1803.05268","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1803.05268","pdf_url":"https://arxiv.org/pdf/1803.05268","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1803.05268","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1803.05268","pdf_url":"https://arxiv.org/pdf/1803.05268","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W1514535095","https://openalex.org/W1522301498","https://openalex.org/W1677182931","https://openalex.org/W1934890906","https://openalex.org/W2011301426","https://openalex.org/W2170881581","https://openalex.org/W2171810632","https://openalex.org/W2174492417","https://openalex.org/W2179022885","https://openalex.org/W2194775991","https://openalex.org/W2230472587","https://openalex.org/W2255577267","https://openalex.org/W2256357568","https://openalex.org/W2439568532","https://openalex.org/W2463565445","https://openalex.org/W2561715562","https://openalex.org/W2609476118","https://openalex.org/W2613526370","https://openalex.org/W2624614404","https://openalex.org/W2734498959","https://openalex.org/W2737766105","https://openalex.org/W2743547909","https://openalex.org/W2747768235","https://openalex.org/W2749983526","https://openalex.org/W2752333386","https://openalex.org/W2785578203","https://openalex.org/W2786209943","https://openalex.org/W2787119853","https://openalex.org/W2949650786","https://openalex.org/W2949888546","https://openalex.org/W2953212746","https://openalex.org/W2962716332","https://openalex.org/W2962731754","https://openalex.org/W2962749469","https://openalex.org/W2962858109","https://openalex.org/W2962933067","https://openalex.org/W2963143606","https://openalex.org/W2963150162","https://openalex.org/W2963191264","https://openalex.org/W2963223524","https://openalex.org/W2963224792","https://openalex.org/W2963321359","https://openalex.org/W2963383024","https://openalex.org/W2963468699","https://openalex.org/W2963495494","https://openalex.org/W2963651499","https://openalex.org/W2963656855","https://openalex.org/W2963840672","https://openalex.org/W2963907629","https://openalex.org/W2963954913","https://openalex.org/W2964121744","https://openalex.org/W3102564565","https://openalex.org/W4297749157","https://openalex.org/W4299408792","https://openalex.org/W6630875275","https://openalex.org/W6631190155","https://openalex.org/W6685520387","https://openalex.org/W6687483927","https://openalex.org/W6692244562","https://openalex.org/W6696085341","https://openalex.org/W6731265811","https://openalex.org/W6743651696","https://openalex.org/W6743913890","https://openalex.org/W6748581942"],"related_works":["https://openalex.org/W2113245685","https://openalex.org/W4226468307","https://openalex.org/W2142763871","https://openalex.org/W2122572865","https://openalex.org/W4390478989","https://openalex.org/W4385571345","https://openalex.org/W1538565162","https://openalex.org/W2389872472","https://openalex.org/W2963722138","https://openalex.org/W4307078079"],"abstract_inverted_index":{"Visual":[0],"question":[1],"answering":[2],"requires":[3],"high-order":[4],"reasoning":[5,36,55,71,87,107],"about":[6],"an":[7,30,65,110,122],"image,":[8],"which":[9],"is":[10,159],"a":[11,45,91,101,168,186],"fundamental":[12],"capability":[13],"needed":[14],"by":[15],"machine":[16],"systems":[17],"to":[18,28,125,161],"follow":[19],"complex":[20,53,106],"directives.":[21],"Recently,":[22],"modular":[23,39],"networks":[24,40],"have":[25],"been":[26],"shown":[27],"be":[29],"effective":[31,66],"framework":[32],"for":[33,68],"performing":[34,105],"visual":[35,54,86],"tasks.":[37],"While":[38],"were":[41],"initially":[42],"designed":[43],"with":[44],"degree":[46],"of":[47,93,104,117,131,147,171,195],"model":[48,102,158],"transparency,":[49],"their":[50],"performance":[51,79],"on":[52,149],"benchmarks":[56],"was":[57],"lacking.":[58],"Current":[59],"state-of-the-art":[60,85,145],"approaches":[61],"do":[62],"not":[63],"provide":[64],"mechanism":[67],"understanding":[69],"the":[70,78,118,127,132,150,178,192,196],"process.":[72],"In":[73],"this":[74],"paper,":[75],"we":[76,136,182],"close":[77],"gap":[80],"between":[81],"interpretable":[82],"models":[83],"and":[84,115,129],"methods.":[88],"We":[89,153],"propose":[90],"set":[92],"visual-reasoning":[94],"primitives":[95,140],"which,":[96],"when":[97,166],"composed,":[98],"manifest":[99],"as":[100],"capable":[103],"tasks":[108],"in":[109],"explicitly-interpretable":[111],"manner.":[112],"The":[113],"fidelity":[114],"interpretability":[116],"primitives'":[119],"outputs":[120],"enable":[121],"unparalleled":[123],"ability":[124],"diagnose":[126],"strengths":[128],"weaknesses":[130],"resulting":[133],"model.":[134],"Critically,":[135],"show":[137,155,183],"that":[138,156],"these":[139],"are":[141],"highly":[142],"performant,":[143],"achieving":[144],"accuracy":[146],"99.1%":[148],"CLEVR":[151],"dataset.":[152],"also":[154],"our":[157],"able":[160],"effectively":[162],"learn":[163],"generalized":[164],"representations":[165],"provided":[167],"small":[169],"amount":[170],"data":[172],"containing":[173],"novel":[174],"object":[175],"attributes.":[176],"Using":[177],"CoGenT":[179],"generalization":[180],"task,":[181],"more":[184],"than":[185],"20":[187],"percentage":[188],"point":[189],"improvement":[190],"over":[191],"current":[193],"state":[194],"art.":[197]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":19},{"year":2023,"cited_by_count":22},{"year":2022,"cited_by_count":21},{"year":2021,"cited_by_count":36},{"year":2020,"cited_by_count":33},{"year":2019,"cited_by_count":45},{"year":2018,"cited_by_count":12}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2018-03-29T00:00:00"}
