{"id":"https://openalex.org/W7106796607","doi":"https://doi.org/10.48550/arxiv.2511.19803","title":"Scalable Data Attribution via Forward-Only Test-Time Inference","display_name":"Scalable Data Attribution via Forward-Only Test-Time Inference","publication_year":2025,"publication_date":"2025-11-25","ids":{"openalex":"https://openalex.org/W7106796607","doi":"https://doi.org/10.48550/arxiv.2511.19803"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2511.19803","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.19803","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2511.19803","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Ma, Sibo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ma, Sibo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Nyarko, Julian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nyarko, Julian","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.3490999937057495,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.3490999937057495,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.15029999613761902,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.0843999981880188,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6427000164985657},{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.6082000136375427},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5737000107765198},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.47040000557899475},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.459199994802475},{"id":"https://openalex.org/keywords/backpropagation","display_name":"Backpropagation","score":0.45159998536109924},{"id":"https://openalex.org/keywords/attribution","display_name":"Attribution","score":0.41179999709129333},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.38679999113082886},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.36719998717308044},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.3467000126838684}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7441999912261963},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6427000164985657},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.6082000136375427},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5737000107765198},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5475000143051147},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.47040000557899475},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4697999954223633},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.459199994802475},{"id":"https://openalex.org/C155032097","wikidata":"https://www.wikidata.org/wiki/Q798503","display_name":"Backpropagation","level":3,"score":0.45159998536109924},{"id":"https://openalex.org/C143299363","wikidata":"https://www.wikidata.org/wiki/Q900584","display_name":"Attribution","level":2,"score":0.41179999709129333},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.387800008058548},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.38679999113082886},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.36719998717308044},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.3467000126838684},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.33640000224113464},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3312999904155731},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3312000036239624},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.31709998846054077},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.29789999127388},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.290800005197525},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2815000116825104},{"id":"https://openalex.org/C203616005","wikidata":"https://www.wikidata.org/wiki/Q620495","display_name":"Hessian matrix","level":2,"score":0.2775000035762787},{"id":"https://openalex.org/C2385561","wikidata":"https://www.wikidata.org/wiki/Q45432","display_name":"RSS","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.25429999828338623},{"id":"https://openalex.org/C2780909371","wikidata":"https://www.wikidata.org/wiki/Q4801092","display_name":"Artificial noise","level":4,"score":0.2538999915122986},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2515999972820282},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2511.19803","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.19803","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2511.19803","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.19803","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Data":[0],"attribution":[1,50,159,187],"seeks":[2],"to":[3,8,96],"trace":[4],"model":[5,104,126],"behavior":[6],"back":[7],"the":[9,54,138],"training":[10,68,77],"examples":[11],"that":[12,52],"shaped":[13],"it,":[14],"enabling":[15],"debugging,":[16],"auditing,":[17],"and":[18,78,162],"data":[19,49,119,186],"valuation":[20],"at":[21,44],"scale.":[22],"Classical":[23],"influence-function":[24,172],"methods":[25],"offer":[26],"a":[27,48,103,114,123,133,180],"principled":[28],"foundation":[29],"but":[30,111],"remain":[31],"impractical":[32],"for":[33,83,183],"modern":[34],"networks":[35],"because":[36],"they":[37],"require":[38],"expensive":[39],"backpropagation":[40],"or":[41,150],"Hessian":[42],"inversion":[43],"inference.":[45],"We":[46],"propose":[47],"method":[51,178],"preserves":[53],"same":[55],"first-order":[56,175],"counterfactual":[57],"target":[58],"while":[59,131,164],"eliminating":[60],"per-query":[61],"backward":[62],"passes.":[63],"Our":[64],"approach":[65],"simulates":[66],"each":[67],"example's":[69],"parameter":[70],"influence":[71],"through":[72],"short-horizon":[73],"gradient":[74],"propagation":[75],"during":[76],"later":[79],"reads":[80],"out":[81],"attributions":[82],"any":[84],"query":[85],"using":[86],"only":[87],"forward":[88],"evaluations.":[89],"This":[90],"design":[91],"shifts":[92],"computation":[93],"from":[94,113],"inference":[95,168],"simulation,":[97],"reflecting":[98],"real":[99],"deployment":[100],"regimes":[101],"where":[102],"may":[105],"serve":[106],"billions":[107],"of":[108,118],"user":[109],"queries":[110],"originate":[112],"fixed,":[115],"finite":[116],"set":[117],"sources":[120],"(for":[121],"example,":[122],"large":[124,189],"language":[125],"trained":[127],"on":[128,143,157],"diverse":[129],"corpora":[130],"compensating":[132],"specific":[134],"publisher":[135],"such":[136,154],"as":[137,155],"New":[139],"York":[140],"Times).":[141],"Empirically,":[142],"standard":[144,158],"MLP":[145],"benchmarks,":[146],"our":[147,177],"estimator":[148],"matches":[149],"surpasses":[151],"state-of-the-art":[152],"baselines":[153],"TRAK":[156],"metrics":[160],"(LOO":[161],"LDS)":[163],"offering":[165],"orders-of-magnitude":[166],"lower":[167],"cost.":[169],"By":[170],"combining":[171],"fidelity":[173],"with":[174],"scalability,":[176],"provides":[179],"theoretical":[181],"framework":[182],"practical,":[184],"real-time":[185],"in":[188],"pretrained":[190],"models.":[191]},"counts_by_year":[],"updated_date":"2025-11-28T02:12:24.556248","created_date":"2025-11-28T00:00:00"}
