{"id":"https://openalex.org/W4416342056","doi":"https://doi.org/10.1109/socc66126.2025.11235358","title":"Efficient Task Graph Generation for DNN Inference on FPGA-based SoC Platforms","display_name":"Efficient Task Graph Generation for DNN Inference on FPGA-based SoC Platforms","publication_year":2025,"publication_date":"2025-09-29","ids":{"openalex":"https://openalex.org/W4416342056","doi":"https://doi.org/10.1109/socc66126.2025.11235358"},"language":null,"primary_location":{"id":"doi:10.1109/socc66126.2025.11235358","is_oa":false,"landing_page_url":"https://doi.org/10.1109/socc66126.2025.11235358","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 38th International System-on-Chip Conference (SOCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068133048","display_name":"Mukta Debnath","orcid":"https://orcid.org/0000-0001-5478-5163"},"institutions":[{"id":"https://openalex.org/I106542073","display_name":"University of Calcutta","ror":"https://ror.org/01e7v7w47","country_code":"IN","type":"education","lineage":["https://openalex.org/I106542073"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Mukta Debnath","raw_affiliation_strings":["University of Calcutta,India"],"affiliations":[{"raw_affiliation_string":"University of Calcutta,India","institution_ids":["https://openalex.org/I106542073"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039894957","display_name":"Shuvadeep Bhattacharjee","orcid":null},"institutions":[{"id":"https://openalex.org/I106542073","display_name":"University of Calcutta","ror":"https://ror.org/01e7v7w47","country_code":"IN","type":"education","lineage":["https://openalex.org/I106542073"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shuvadeep Bhattacharjee","raw_affiliation_strings":["University of Calcutta,India"],"affiliations":[{"raw_affiliation_string":"University of Calcutta,India","institution_ids":["https://openalex.org/I106542073"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028889445","display_name":"Prapti Ganguly","orcid":"https://orcid.org/0009-0006-7861-8785"},"institutions":[{"id":"https://openalex.org/I106542073","display_name":"University of Calcutta","ror":"https://ror.org/01e7v7w47","country_code":"IN","type":"education","lineage":["https://openalex.org/I106542073"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Prapti Ganguly","raw_affiliation_strings":["University of Calcutta,India"],"affiliations":[{"raw_affiliation_string":"University of Calcutta,India","institution_ids":["https://openalex.org/I106542073"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101436820","display_name":"Swagata Mandal","orcid":"https://orcid.org/0000-0002-0534-3083"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Swagata Mandal","raw_affiliation_strings":["Jalpaiguri Government Engineering College,India"],"affiliations":[{"raw_affiliation_string":"Jalpaiguri Government Engineering College,India","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043543748","display_name":"Amlan Chakrabarti","orcid":"https://orcid.org/0000-0003-4380-3172"},"institutions":[{"id":"https://openalex.org/I106542073","display_name":"University of Calcutta","ror":"https://ror.org/01e7v7w47","country_code":"IN","type":"education","lineage":["https://openalex.org/I106542073"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Amlan Chakrabarti","raw_affiliation_strings":["University of Calcutta,India"],"affiliations":[{"raw_affiliation_string":"University of Calcutta,India","institution_ids":["https://openalex.org/I106542073"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5068133048"],"corresponding_institution_ids":["https://openalex.org/I106542073"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.3504027,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8837000131607056,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8837000131607056,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.02410000003874302,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.008200000040233135,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5874999761581421},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5735999941825867},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4999000132083893},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.47839999198913574},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.41370001435279846},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.40860000252723694},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.38089999556541443},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.37389999628067017},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.33899998664855957}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8169999718666077},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5874999761581421},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5735999941825867},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4999000132083893},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.47839999198913574},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.42590001225471497},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4156000018119812},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.41370001435279846},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.40860000252723694},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.38089999556541443},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.37389999628067017},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3571999967098236},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.33899998664855957},{"id":"https://openalex.org/C127964446","wikidata":"https://www.wikidata.org/wiki/Q1092142","display_name":"Computational resource","level":3,"score":0.33660000562667847},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.32989999651908875},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.3296000063419342},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.3188999891281128},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.3156000077724457},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.31529998779296875},{"id":"https://openalex.org/C77390884","wikidata":"https://www.wikidata.org/wiki/Q217302","display_name":"Application-specific integrated circuit","level":2,"score":0.3125},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C2777575374","wikidata":"https://www.wikidata.org/wiki/Q1644704","display_name":"MicroBlaze","level":3,"score":0.2996000051498413},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.296099990606308},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.2858000099658966},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.27970001101493835},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C58013763","wikidata":"https://www.wikidata.org/wiki/Q5754574","display_name":"High-level synthesis","level":3,"score":0.271699994802475},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/socc66126.2025.11235358","is_oa":false,"landing_page_url":"https://doi.org/10.1109/socc66126.2025.11235358","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 38th International System-on-Chip Conference (SOCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2396572963","https://openalex.org/W2891946740","https://openalex.org/W2919115771","https://openalex.org/W2936278485","https://openalex.org/W2945146780","https://openalex.org/W2963122961","https://openalex.org/W3204657531","https://openalex.org/W4242577057","https://openalex.org/W4292072255","https://openalex.org/W4362723189","https://openalex.org/W4403186986","https://openalex.org/W4404927165","https://openalex.org/W4409347793"],"related_works":[],"abstract_inverted_index":{"Deep":[0],"neural":[1],"networks":[2],"(DNNs)":[3],"are":[4],"increasingly":[5],"deployed":[6],"on":[7,81,90,125],"edge":[8],"AI":[9],"platforms":[10],"built":[11],"with":[12,107],"FPGA-based":[13,108],"system-on-chip":[14],"(SoC)":[15],"architectures.":[16],"However,":[17],"their":[18],"high":[19],"computational":[20],"complexity":[21],"and":[22,48,66,77,97,102,120,146],"memory":[23],"requirements":[24],"pose":[25],"significant":[26],"challenges":[27],"for":[28,74],"efficient":[29],"hardware":[30,143],"deployment.":[31],"In":[32],"this":[33],"work,":[34],"we":[35],"present":[36],"a":[37,121],"lightweight,":[38],"static":[39],"preprocessing":[40,64],"technique":[41],"that":[42,88],"performs":[43],"batch":[44],"normalization":[45],"(BN)":[46],"fusion":[47],"per-channel":[49],"INT8":[50],"quantization":[51],"prior":[52],"to":[53,134],"task":[54,70,100],"tree":[55],"generation.":[56],"This":[57],"transformation":[58],"significantly":[59],"reduces":[60],"model":[61,138],"size,":[62,139],"lowers":[63],"latency":[65],"produces":[67],"simplified,":[68],"hardware-friendly":[69],"graphs\u2014eliminating":[71],"the":[72,126],"need":[73],"runtime":[75],"transformations":[76],"enabling":[78],"direct":[79],"deployment":[80],"custom":[82,122],"FPGA":[83],"accelerators.":[84],"Unlike":[85],"traditional":[86],"methods":[87],"rely":[89],"retraining,":[91],"our":[92,113],"approach":[93],"operates":[94],"entirely":[95],"post-training":[96],"outputs":[98],"lightweight":[99],"trees":[101],"quantized":[103],"weight":[104],"files":[105],"compatible":[106],"SoC":[109],"schedulers.":[110],"We":[111],"validate":[112],"framework":[114],"using":[115],"AlexNet,":[116],"VGG16,":[117],"SqueezeNet,":[118],"LeNet":[119],"BN-heavy":[123],"CNN":[124],"Xilinx":[127],"Zynq":[128],"ZCU104":[129],"platform.":[130],"Results":[131],"show":[132],"up":[133],"70%":[135],"reduction":[136,141],"in":[137,142],"25\u201335%":[140],"resource":[144],"usage":[145],"negligible":[147],"accuracy":[148],"drop":[149],"(<0.2%),":[150],"while":[151],"preserving":[152],"inference":[153],"throughput.":[154]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-17T00:00:00"}
