{"id":"https://openalex.org/W4416036883","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.198","title":"From Reasoning to Answer: Empirical, Attention-Based and Mechanistic Insights into Distilled DeepSeek R1 Models","display_name":"From Reasoning to Answer: Empirical, Attention-Based and Mechanistic Insights into Distilled DeepSeek R1 Models","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416036883","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.198"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.198","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.198","pdf_url":"https://aclanthology.org/2025.emnlp-main.198.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.198.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100752550","display_name":"Jue Zhang","orcid":"https://orcid.org/0000-0003-0440-1357"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jue Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088646345","display_name":"Qingwei Lin","orcid":"https://orcid.org/0000-0003-2559-2383"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qingwei Lin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070722259","display_name":"Saravan Rajmohan","orcid":"https://orcid.org/0000-0002-2019-213X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saravan Rajmohan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100331488","display_name":"Dongmei Zhang","orcid":"https://orcid.org/0000-0002-9230-2799"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dongmei Zhang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100752550"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18638486,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3985","last_page":"4002"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.21699999272823334,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.21699999272823334,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.03220000118017197,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.031700000166893005,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/distilled-water","display_name":"Distilled water","score":0.2928999960422516},{"id":"https://openalex.org/keywords/mathematical-model","display_name":"Mathematical model","score":0.23309999704360962},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.22310000658035278}],"concepts":[{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.451200008392334},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3711000084877014},{"id":"https://openalex.org/C186060115","wikidata":"https://www.wikidata.org/wiki/Q30336093","display_name":"Biological system","level":1,"score":0.3165999948978424},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30300000309944153},{"id":"https://openalex.org/C2779429693","wikidata":"https://www.wikidata.org/wiki/Q274959","display_name":"Distilled water","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.23999999463558197},{"id":"https://openalex.org/C76969082","wikidata":"https://www.wikidata.org/wiki/Q486902","display_name":"Mathematical model","level":2,"score":0.23309999704360962},{"id":"https://openalex.org/C183696295","wikidata":"https://www.wikidata.org/wiki/Q2487696","display_name":"Biochemical engineering","level":1,"score":0.23100000619888306},{"id":"https://openalex.org/C21880701","wikidata":"https://www.wikidata.org/wiki/Q2144042","display_name":"Process engineering","level":1,"score":0.22519999742507935},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.22310000658035278}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.198","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.198","pdf_url":"https://aclanthology.org/2025.emnlp-main.198.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.198","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.198","pdf_url":"https://aclanthology.org/2025.emnlp-main.198.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416036883.pdf","grobid_xml":"https://content.openalex.org/works/W4416036883.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Reasoning":[1],"Models":[2],"(LRMs)":[3],"generate":[4],"explicit":[5,51],"reasoning":[6,34,52,69,80,100,108,125,136,147],"traces":[7,17],"alongside":[8],"final":[9,114],"answers,":[10,115],"yet":[11],"the":[12,31,79,94,113,142],"extent":[13],"to":[14,68,92,106,126],"which":[15],"these":[16],"influence":[18],"answer":[19,36,55,64,97,139],"generation":[20,37],"remains":[21],"unclear.In":[22],"this":[23],"work,":[24],"we":[25,47,85],"conduct":[26],"a":[27,117],"three-stage":[28],"investigation":[29],"into":[30],"interplay":[32],"between":[33],"and":[35,119],"in":[38,148],"three":[39],"distilled":[40],"DeepSeek":[41],"R1":[42],"models.First,":[43],"through":[44],"empirical":[45],"evaluation,":[46],"demonstrate":[48],"that":[49,63,104],"including":[50,82],"consistently":[53],"improves":[54],"quality":[56],"across":[57],"diverse":[58],"domains.Second,":[59],"attention":[60],"analysis":[61],"reveals":[62],"tokens":[65,98,109,137],"attend":[66],"substantially":[67],"tokens,":[70],"with":[71],"certain":[72],"midlayer":[73],"Reasoning-Focus":[74],"Heads":[75],"(RFHs)":[76],"closely":[77],"tracking":[78],"trajectory,":[81],"selfreflective":[83],"cues.Third,":[84],"apply":[86],"mechanistic":[87],"interventions":[88],"using":[89],"activation":[90],"patching":[91],"assess":[93],"dependence":[95],"of":[96,122,132,145],"on":[99],"activations.Our":[101],"results":[102],"show":[103],"perturbations":[105],"key":[107],"can":[110],"reliably":[111],"alter":[112],"confirming":[116],"directional":[118],"functional":[120,143],"flow":[121],"information":[123],"from":[124],"answer.These":[127],"findings":[128],"deepen":[129],"our":[130],"understanding":[131],"how":[133],"LRMs":[134],"leverage":[135],"for":[138],"generation,":[140],"highlighting":[141],"role":[144],"intermediate":[146],"shaping":[149],"model":[150],"outputs.":[151]},"counts_by_year":[],"updated_date":"2026-03-08T06:56:09.383167","created_date":"2025-11-08T00:00:00"}
