{"id":"https://openalex.org/W7155516086","doi":"https://doi.org/10.1145/3799830.3799842","title":"SPARK: Scene Prediction Augmented with Relational-Commonsense Knowledge","display_name":"SPARK: Scene Prediction Augmented with Relational-Commonsense Knowledge","publication_year":2025,"publication_date":"2025-12-17","ids":{"openalex":"https://openalex.org/W7155516086","doi":"https://doi.org/10.1145/3799830.3799842"},"language":null,"primary_location":{"id":"doi:10.1145/3799830.3799842","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3799830.3799842","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th ACM IKDD International Conference on Data Science","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3799830.3799842","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120637443","display_name":"Chaitanya Garg","orcid":"https://orcid.org/0009-0004-2340-6561"},"institutions":[{"id":"https://openalex.org/I119939252","display_name":"Indraprastha Institute of Information Technology Delhi","ror":"https://ror.org/03vfp4g33","country_code":"IN","type":"education","lineage":["https://openalex.org/I119939252"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Chaitanya Garg","raw_affiliation_strings":["Knowledgeable Computing and Reasoning Lab, Indraprastha Institute of Information Technology Delhi, New Delhi, Delhi, India"],"raw_orcid":"https://orcid.org/0009-0004-2340-6561","affiliations":[{"raw_affiliation_string":"Knowledgeable Computing and Reasoning Lab, Indraprastha Institute of Information Technology Delhi, New Delhi, Delhi, India","institution_ids":["https://openalex.org/I119939252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134485941","display_name":"Tanishq Jain","orcid":"https://orcid.org/0009-0005-0177-3448"},"institutions":[{"id":"https://openalex.org/I119939252","display_name":"Indraprastha Institute of Information Technology Delhi","ror":"https://ror.org/03vfp4g33","country_code":"IN","type":"education","lineage":["https://openalex.org/I119939252"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Tanishq Jain","raw_affiliation_strings":["Knowledgeable Computing and Reasoning Lab, Indraprastha Institute of Information Technology Delhi, New Delhi, Delhi, India"],"raw_orcid":"https://orcid.org/0009-0005-0177-3448","affiliations":[{"raw_affiliation_string":"Knowledgeable Computing and Reasoning Lab, Indraprastha Institute of Information Technology Delhi, New Delhi, Delhi, India","institution_ids":["https://openalex.org/I119939252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5118996647","display_name":"Sathyanarayanan Aakur","orcid":null},"institutions":[{"id":"https://openalex.org/I82497590","display_name":"Auburn University","ror":"https://ror.org/02v80fc35","country_code":"US","type":"education","lineage":["https://openalex.org/I82497590"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sathyanarayanan Aakur","raw_affiliation_strings":["Computer Science Department, Auburn University, Auburn, Alabama, USA"],"raw_orcid":"https://orcid.org/0000-0003-1062-8929","affiliations":[{"raw_affiliation_string":"Computer Science Department, Auburn University, Auburn, Alabama, USA","institution_ids":["https://openalex.org/I82497590"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027667267","display_name":"Raghava Mutharaju","orcid":"https://orcid.org/0000-0003-2421-3935"},"institutions":[{"id":"https://openalex.org/I119939252","display_name":"Indraprastha Institute of Information Technology Delhi","ror":"https://ror.org/03vfp4g33","country_code":"IN","type":"education","lineage":["https://openalex.org/I119939252"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Raghava Mutharaju","raw_affiliation_strings":["Knowledgeable Computing and Reasoning Lab, Indraprastha Institute of Information Technology Delhi, New Delhi, Delhi, India"],"raw_orcid":"https://orcid.org/0000-0003-2421-3935","affiliations":[{"raw_affiliation_string":"Knowledgeable Computing and Reasoning Lab, Indraprastha Institute of Information Technology Delhi, New Delhi, Delhi, India","institution_ids":["https://openalex.org/I119939252"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5120637443"],"corresponding_institution_ids":["https://openalex.org/I119939252"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.73886142,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"106","last_page":"114"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9666000008583069,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9666000008583069,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.010099999606609344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.004699999932199717,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scene-graph","display_name":"Scene graph","score":0.6952000260353088},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5281000137329102},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.5253999829292297},{"id":"https://openalex.org/keywords/commonsense-knowledge","display_name":"Commonsense knowledge","score":0.4666999876499176},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.427700012922287},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.38440001010894775},{"id":"https://openalex.org/keywords/commonsense-reasoning","display_name":"Commonsense reasoning","score":0.3736000061035156},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.3395000100135803}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7476999759674072},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.6952000260353088},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5838000178337097},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5281000137329102},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.5253999829292297},{"id":"https://openalex.org/C30542707","wikidata":"https://www.wikidata.org/wiki/Q1603203","display_name":"Commonsense knowledge","level":3,"score":0.4666999876499176},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.427700012922287},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.38440001010894775},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.3736000061035156},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3569999933242798},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.3395000100135803},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.3393000066280365},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.33709999918937683},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.31790000200271606},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.3176000118255615},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.30169999599456787},{"id":"https://openalex.org/C140146324","wikidata":"https://www.wikidata.org/wiki/Q1144319","display_name":"Predicate (mathematical logic)","level":2,"score":0.29829999804496765},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.28060001134872437},{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.27250000834465027},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.267300009727478},{"id":"https://openalex.org/C197914299","wikidata":"https://www.wikidata.org/wiki/Q18650","display_name":"Semantic memory","level":3,"score":0.25839999318122864},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3799830.3799842","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3799830.3799842","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th ACM IKDD International Conference on Data Science","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3799830.3799842","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3799830.3799842","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th ACM IKDD International Conference on Data Science","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"No poverty","id":"https://metadata.un.org/sdg/1","score":0.5452675223350525}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1498436455","https://openalex.org/W2064675550","https://openalex.org/W2081580037","https://openalex.org/W2108598243","https://openalex.org/W2112796928","https://openalex.org/W2116341502","https://openalex.org/W2277195237","https://openalex.org/W2479423890","https://openalex.org/W2561529111","https://openalex.org/W2950096400","https://openalex.org/W2962737704","https://openalex.org/W2962785943","https://openalex.org/W2963536419","https://openalex.org/W2963650529","https://openalex.org/W2964199361","https://openalex.org/W3108864070","https://openalex.org/W3165066581","https://openalex.org/W3202514640","https://openalex.org/W3207531676","https://openalex.org/W4285208802","https://openalex.org/W4312689172","https://openalex.org/W4366352717","https://openalex.org/W4389782456"],"related_works":[],"abstract_inverted_index":{"Scene":[0],"Graph":[1,108],"Generation":[2],"(SGG)":[3],"has":[4],"emerged":[5],"as":[6],"a":[7,43,67,74,85,133,182],"critical":[8],"task":[9],"for":[10],"visual":[11,25,97,192],"understanding,":[12],"yet":[13],"conventional":[14],"approaches":[15],"struggle":[16],"with":[17,40,55,96],"long-tailed":[18,171],"data":[19],"distributions,":[20,127],"limited":[21],"expressiveness":[22],"in":[23,185],"capturing":[24],"relationships,":[26],"and":[27,73,159,174],"poor":[28],"generalization":[29],"to":[30,94,112,137,168,176],"novel":[31,86,177],"relationships.":[32,98,178],"This":[33,163],"paper":[34],"introduces":[35],"SPARK":[36,149,180],"(Scene":[37],"Prediction":[38],"Augmented":[39],"Relational-Commonsense":[41],"Knowledge),":[42],"neuro-symbolic":[44],"framework":[45],"that":[46,78,90,148],"enhances":[47],"scene":[48,119],"graph":[49,104],"generation":[50],"by":[51],"integrating":[52,186],"commonsense":[53,71,188],"knowledge":[54,76,92,100,189],"existing":[56,191],"vision-based":[57],"SGG":[58],"models.":[59],"Our":[60,99],"approach":[61,89],"consists":[62],"of":[63,70],"two":[64],"key":[65],"components:":[66],"comprehensive":[68],"remodeling":[69],"knowledge,":[72],"transformer-based":[75],"module":[77,101],"augments":[79],"the":[80,118,143,151,170],"vision":[81,153],"module.":[82],"We":[83],"employ":[84],"two-step":[87],"LLM-based":[88],"remodels":[91],"triples":[93],"align":[95],"then":[102],"leverages":[103],"embeddings":[105],"refined":[106],"through":[107,132],"Neural":[109],"Networks":[110],"(GNNs)":[111],"capture":[113],"rich":[114],"semantic":[115],"information":[116],"about":[117],"entities.":[120],"Both":[121],"modules":[122],"independently":[123],"generate":[124],"predicate":[125],"probability":[126],"which":[128],"are":[129],"simultaneously":[130],"processed":[131],"multi-layer":[134],"perceptron":[135],"(MLP)":[136],"produce":[138],"final":[139],"predictions.":[140],"Evaluation":[141],"on":[142],"Visual":[144],"Genome":[145],"dataset":[146],"demonstrates":[147],"outperforms":[150],"base":[152],"model":[154],"across":[155],"several":[156],"Mean":[157],"Recall":[158,161],"Zero-Shot":[160],"metrics.":[162],"indicates":[164],"an":[165],"enhanced":[166],"ability":[167],"handle":[169],"distribution":[172],"problem":[173],"generalize":[175],"Hence,":[179],"represents":[181],"significant":[183],"advancement":[184],"structured":[187],"into":[190],"understanding":[193],"systems.":[194],"SPARK\u2019s":[195],"codebase":[196],"is":[197],"available":[198],"at":[199],"https://github.com/kracr/commonsense-scene-graph.":[200]},"counts_by_year":[],"updated_date":"2026-04-25T06:06:54.107920","created_date":"2026-04-25T00:00:00"}
