{"id":"https://openalex.org/W4412594747","doi":"https://doi.org/10.3390/bdcc9080193","title":"Survey on the Role of Mechanistic Interpretability in Generative AI","display_name":"Survey on the Role of Mechanistic Interpretability in Generative AI","publication_year":2025,"publication_date":"2025-07-23","ids":{"openalex":"https://openalex.org/W4412594747","doi":"https://doi.org/10.3390/bdcc9080193"},"language":"en","primary_location":{"id":"doi:10.3390/bdcc9080193","is_oa":true,"landing_page_url":"https://doi.org/10.3390/bdcc9080193","pdf_url":"https://www.mdpi.com/2504-2289/9/8/193/pdf?version=1753265287","source":{"id":"https://openalex.org/S4210238752","display_name":"Big Data and Cognitive Computing","issn_l":"2504-2289","issn":["2504-2289"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-2289/9/8/193/pdf?version=1753265287","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062023845","display_name":"Leonardo Ranaldi","orcid":"https://orcid.org/0000-0001-8488-4146"},"institutions":[{"id":"https://openalex.org/I116067653","display_name":"University of Rome Tor Vergata","ror":"https://ror.org/02p77k626","country_code":"IT","type":"education","lineage":["https://openalex.org/I116067653"]},{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB","IT"],"is_corresponding":true,"raw_author_name":"Leonardo Ranaldi","raw_affiliation_strings":["Human-Centric ART, University of Rome Tor Vergata, Viale del Politecnico, 1, 00133 Rome, Italy","School of Informatics, University of Edinburgh, Edinburgh EH8 9AB, UK"],"affiliations":[{"raw_affiliation_string":"Human-Centric ART, University of Rome Tor Vergata, Viale del Politecnico, 1, 00133 Rome, Italy","institution_ids":["https://openalex.org/I116067653"]},{"raw_affiliation_string":"School of Informatics, University of Edinburgh, Edinburgh EH8 9AB, UK","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5062023845"],"corresponding_institution_ids":["https://openalex.org/I116067653","https://openalex.org/I98677209"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":11.7842,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.98230935,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":"9","issue":"8","first_page":"193","last_page":"193"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14351","display_name":"Statistical and Computational Modeling","score":0.9677000045776367,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9635000228881836,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.9413144588470459},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.6908027529716492},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4058421552181244},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3586094379425049},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.35205602645874023}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.9413144588470459},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.6908027529716492},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4058421552181244},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3586094379425049},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.35205602645874023}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/bdcc9080193","is_oa":true,"landing_page_url":"https://doi.org/10.3390/bdcc9080193","pdf_url":"https://www.mdpi.com/2504-2289/9/8/193/pdf?version=1753265287","source":{"id":"https://openalex.org/S4210238752","display_name":"Big Data and Cognitive Computing","issn_l":"2504-2289","issn":["2504-2289"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"journal-article"},{"id":"pmh:oai:art.torvergata.it:2108/429543","is_oa":false,"landing_page_url":"https://hdl.handle.net/2108/429543","pdf_url":null,"source":{"id":"https://openalex.org/S4306400993","display_name":"Cineca Institutional Research Information System (Tor Vergata University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I116067653","host_organization_name":"University of Rome Tor Vergata","host_organization_lineage":["https://openalex.org/I116067653"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"},{"id":"pmh:oai:art.torvergata.it:2108/453223","is_oa":false,"landing_page_url":"https://hdl.handle.net/2108/453223","pdf_url":null,"source":{"id":"https://openalex.org/S4306400993","display_name":"Cineca Institutional Research Information System (Tor Vergata University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I116067653","host_organization_name":"University of Rome Tor Vergata","host_organization_lineage":["https://openalex.org/I116067653"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"},{"id":"pmh:oai:doaj.org/article:4dce2118399e497c960b71c6708508d5","is_oa":true,"landing_page_url":"https://doaj.org/article/4dce2118399e497c960b71c6708508d5","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Big Data and Cognitive Computing, Vol 9, Iss 8, p 193 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/bdcc9080193","is_oa":true,"landing_page_url":"https://doi.org/10.3390/bdcc9080193","pdf_url":"https://www.mdpi.com/2504-2289/9/8/193/pdf?version=1753265287","source":{"id":"https://openalex.org/S4210238752","display_name":"Big Data and Cognitive Computing","issn_l":"2504-2289","issn":["2504-2289"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412594747.pdf","grobid_xml":"https://content.openalex.org/works/W4412594747.grobid-xml"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W2516809705","https://openalex.org/W2946417913","https://openalex.org/W2962862931","https://openalex.org/W3010694149","https://openalex.org/W3037116584","https://openalex.org/W3090395639","https://openalex.org/W3112541430","https://openalex.org/W3122731516","https://openalex.org/W4206410067","https://openalex.org/W4221166192","https://openalex.org/W4226024653","https://openalex.org/W4226059755","https://openalex.org/W4241940531","https://openalex.org/W4281390462","https://openalex.org/W4281657280","https://openalex.org/W4306176181","https://openalex.org/W4313585449","https://openalex.org/W4320170046","https://openalex.org/W4323557327","https://openalex.org/W4379928343","https://openalex.org/W4385571966","https://openalex.org/W4387929183","https://openalex.org/W4388502440","https://openalex.org/W4389518382","https://openalex.org/W4389519056","https://openalex.org/W4389519449","https://openalex.org/W4394743141","https://openalex.org/W4398138684","https://openalex.org/W4398183782","https://openalex.org/W4401974878","https://openalex.org/W4402670439","https://openalex.org/W4402670703","https://openalex.org/W4402671016","https://openalex.org/W4402671045","https://openalex.org/W4403070034","https://openalex.org/W4403172155","https://openalex.org/W4403577370","https://openalex.org/W4404782389","https://openalex.org/W4404783649","https://openalex.org/W4405974330","https://openalex.org/W4406026208","https://openalex.org/W4406436575","https://openalex.org/W4407880928","https://openalex.org/W6737947904","https://openalex.org/W6838863277","https://openalex.org/W6839328737","https://openalex.org/W6843138171","https://openalex.org/W6843370592","https://openalex.org/W6853260906"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2905433371","https://openalex.org/W2888392564","https://openalex.org/W4310278675","https://openalex.org/W4388422664","https://openalex.org/W4390569940","https://openalex.org/W4361193272","https://openalex.org/W2963326959"],"abstract_inverted_index":{"The":[0],"rapid":[1],"advancement":[2],"of":[3,51,56,81,126,177,186],"artificial":[4],"intelligence":[5],"(AI)":[6],"and":[7,18,29,41,64,78,88,103,118,136,145,167,205,219,231,251,269,286,295,299],"machine":[8],"learning":[9,57],"has":[10],"revolutionised":[11],"how":[12,226],"systems":[13,70],"process":[14,87],"information,":[15],"make":[16],"decisions,":[17],"adapt":[19],"to":[20,38,48,86,194,201,224],"dynamic":[21],"environments.":[22],"AI-driven":[23,261],"approaches":[24,179],"have":[25,46],"significantly":[26],"enhanced":[27],"efficiency":[28,296],"problem-solving":[30],"capabilities":[31,120],"across":[32,66],"various":[33],"domains,":[34],"from":[35],"automated":[36,164],"decision-making":[37,165,271],"knowledge":[39,204,227],"representation":[40,208,220],"predictive":[42],"modelling.":[43],"These":[44],"developments":[45],"led":[47],"the":[49,79,111,149,183,190,280],"emergence":[50],"increasingly":[52],"sophisticated":[53],"models":[54,154,285],"capable":[55],"patterns,":[58],"reasoning":[59,117],"over":[60],"complex":[61],"data":[62,90],"structures,":[63],"generalising":[65],"tasks.":[67],"As":[68],"AI":[69,153,234,284,289,301],"become":[71],"more":[72],"deeply":[73],"integrated":[74],"into":[75,160],"networked":[76],"infrastructures":[77],"Internet":[80],"Things":[82],"(IoT),":[83],"their":[84,143,158,195],"ability":[85],"interpret":[89],"in":[91,233,259,297],"real-time":[92,270],"is":[93,155,228,276],"essential":[94],"for":[95,157,278],"optimising":[96],"intelligent":[97,161],"communication":[98],"networks,":[99],"distributed":[100],"decision":[101],"making,":[102],"autonomous":[104],"IoT":[105],"systems.":[106,235,302],"However,":[107],"despite":[108],"these":[109,274],"achievements,":[110],"internal":[112],"mechanisms":[113,185],"that":[114],"drive":[115],"LLMs\u2019":[116],"generalisation":[119,196],"remain":[121],"largely":[122],"unexplored.":[123],"This":[124,171],"lack":[125],"transparency,":[127],"compounded":[128],"by":[129,237],"challenges":[130],"such":[131],"as":[132,222],"hallucinations,":[133],"adversarial":[134],"perturbations,":[135],"misaligned":[137],"human":[138],"expectations,":[139],"raises":[140],"concerns":[141],"about":[142],"safe":[144],"beneficial":[146],"deployment.":[147],"Understanding":[148,273],"underlying":[150],"principles":[151,275],"governing":[152],"crucial":[156,257,277],"integration":[159],"network":[162,265],"systems,":[163,262],"processes,":[166],"secure":[168],"digital":[169],"infrastructures.":[170],"paper":[172],"provides":[173],"a":[174,239,256],"comprehensive":[175],"analysis":[176],"explainability":[178],"aimed":[180],"at":[181],"uncovering":[182],"fundamental":[184],"LLMs.":[187],"We":[188],"investigate":[189],"strategic":[191],"components":[192],"contributing":[193],"abilities,":[197],"focusing":[198],"on":[199],"methods":[200],"quantify":[202],"acquired":[203],"assess":[206],"its":[207],"within":[209,246],"model":[210],"parameters.":[211],"Specifically,":[212],"we":[213,242],"examine":[214],"mechanistic":[215,240],"interpretability,":[216],"probing":[217],"techniques,":[218],"engineering":[221],"tools":[223],"decipher":[225],"structured,":[229],"encoded,":[230],"retrieved":[232],"Furthermore,":[236],"adopting":[238],"perspective,":[241],"analyse":[243],"emergent":[244],"phenomena":[245],"training":[247],"dynamics,":[248],"particularly":[249],"memorisation":[250],"generalisation,":[252],"which":[253],"also":[254],"play":[255],"role":[258],"broader":[260],"including":[263],"adaptive":[264],"intelligence,":[266],"edge":[267],"computing,":[268],"architectures.":[272],"bridging":[279],"gap":[281],"between":[282],"black-box":[283],"practical,":[287],"explainable":[288],"applications,":[290],"thereby":[291],"ensuring":[292],"trust,":[293],"robustness,":[294],"language-based":[298],"general":[300]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
