{"id":"https://openalex.org/W3203872640","doi":"https://doi.org/10.1145/3453986","title":"Thallo \u2013 Scheduling for High-Performance Large-Scale Non-Linear Least-Squares Solvers","display_name":"Thallo \u2013 Scheduling for High-Performance Large-Scale Non-Linear Least-Squares Solvers","publication_year":2021,"publication_date":"2021-09-24","ids":{"openalex":"https://openalex.org/W3203872640","doi":"https://doi.org/10.1145/3453986","mag":"3203872640"},"language":"en","primary_location":{"id":"doi:10.1145/3453986","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3453986","pdf_url":null,"source":{"id":"https://openalex.org/S185367456","display_name":"ACM Transactions on Graphics","issn_l":"0730-0301","issn":["0730-0301","1557-7368"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Graphics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006123431","display_name":"Michael W. Mara","orcid":"https://orcid.org/0000-0003-3766-9368"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Michael Mara","raw_affiliation_strings":["Stanford University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059313827","display_name":"Felix Heide","orcid":"https://orcid.org/0000-0002-8054-9823"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Felix Heide","raw_affiliation_strings":["Princeton University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Princeton University","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005789795","display_name":"Michael Zollh\u00f6fer","orcid":"https://orcid.org/0000-0003-1219-0625"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Zollh\u00f6fer","raw_affiliation_strings":["Stanford University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088583491","display_name":"Matthias Nie\u00dfner","orcid":"https://orcid.org/0000-0001-6093-5199"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Matthias Nie\u00dfner","raw_affiliation_strings":["Technical University of Munich"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technical University of Munich","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018111215","display_name":"Pat Hanrahan","orcid":"https://orcid.org/0000-0002-3474-9752"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pat Hanrahan","raw_affiliation_strings":["Stanford University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5006123431"],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":0.3879,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.61554229,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"40","issue":"5","first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8569458723068237},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5285124778747559},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.48354220390319824},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.46666353940963745},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.4321432113647461},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.4212239682674408},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.4185873568058014},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4073670208454132},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.33415815234184265},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3219478130340576},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1622055470943451},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.14045178890228271}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8569458723068237},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5285124778747559},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.48354220390319824},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.46666353940963745},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.4321432113647461},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.4212239682674408},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4185873568058014},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4073670208454132},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.33415815234184265},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3219478130340576},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1622055470943451},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.14045178890228271},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3453986","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3453986","pdf_url":null,"source":{"id":"https://openalex.org/S185367456","display_name":"ACM Transactions on Graphics","issn_l":"0730-0301","issn":["0730-0301","1557-7368"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Graphics","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.8700000047683716,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W566443111","https://openalex.org/W1484371059","https://openalex.org/W1489951562","https://openalex.org/W1578285471","https://openalex.org/W1967554269","https://openalex.org/W1979449660","https://openalex.org/W1980208272","https://openalex.org/W1987648924","https://openalex.org/W1987957271","https://openalex.org/W2000214666","https://openalex.org/W2002555321","https://openalex.org/W2004068758","https://openalex.org/W2020429267","https://openalex.org/W2021347102","https://openalex.org/W2047947369","https://openalex.org/W2055312318","https://openalex.org/W2070604790","https://openalex.org/W2071906076","https://openalex.org/W2075402943","https://openalex.org/W2089649030","https://openalex.org/W2099940712","https://openalex.org/W2112667144","https://openalex.org/W2113642685","https://openalex.org/W2124313187","https://openalex.org/W2138569625","https://openalex.org/W2151636374","https://openalex.org/W2163446794","https://openalex.org/W2301937176","https://openalex.org/W2325034626","https://openalex.org/W2336961836","https://openalex.org/W2338287119","https://openalex.org/W2466242877","https://openalex.org/W2468336759","https://openalex.org/W2471164860","https://openalex.org/W2471962767","https://openalex.org/W2590246587","https://openalex.org/W2768683308","https://openalex.org/W2807945634","https://openalex.org/W2810610794","https://openalex.org/W2899771611","https://openalex.org/W2906180966","https://openalex.org/W2913535645","https://openalex.org/W2938476095","https://openalex.org/W2961619211","https://openalex.org/W2962813113","https://openalex.org/W2963732450","https://openalex.org/W2968998169","https://openalex.org/W2977371611","https://openalex.org/W2978940263","https://openalex.org/W2997095758","https://openalex.org/W2997701623","https://openalex.org/W3179762292","https://openalex.org/W4246818198","https://openalex.org/W4251637954","https://openalex.org/W4252201060","https://openalex.org/W6758920291"],"related_works":["https://openalex.org/W2384867379","https://openalex.org/W2329539859","https://openalex.org/W3191490922","https://openalex.org/W2227905990","https://openalex.org/W2765823764","https://openalex.org/W3214280620","https://openalex.org/W2794038527","https://openalex.org/W2151092287","https://openalex.org/W2523801036","https://openalex.org/W2505959877"],"abstract_inverted_index":{"Large-scale":[0],"optimization":[1,84,215],"problems":[2,178],"at":[3],"the":[4,41,50,64,98,151,209],"core":[5],"of":[6,63,94,105,141,173,182,220],"many":[7],"graphics,":[8],"vision,":[9],"and":[10,20,38,100,145,176,184,197,229],"imaging":[11],"applications":[12,189,224],"are":[13],"often":[14],"implemented":[15],"by":[16,92,217,236],"hand":[17],"in":[18,23,36,97,125,203,226],"tedious":[19],"error-prone":[21],"processes":[22],"order":[24],"to":[25],"achieve":[26],"high":[27,53],"performance":[28,54],"(in":[29],"particular":[30],"on":[31],"GPUs),":[32],"despite":[33],"recent":[34],"developments":[35],"libraries":[37],"DSLs.":[39],"At":[40],"same":[42],"time,":[43],"these":[44,110],"hand-crafted":[45],"solver":[46],"implementations":[47],"reveal":[48],"that":[49,59,108],"key":[51],"for":[52,79],"is":[55],"a":[56,76,103,115,126,137,146,163,170],"problem-specific":[57],"schedule":[58],"enables":[60],"efficient":[61],"usage":[62],"underlying":[65],"hardware.":[66],"In":[67],"this":[68,72,227],"work,":[69,228],"we":[70,211],"incorporate":[71],"insight":[73],"into":[74,153],"Thallo,":[75],"domain-specific":[77],"language":[78,128],"large-scale":[80,174],"non-linear":[81,175],"least":[82],"squares":[83],"problems.":[85],"We":[86],"observe":[87],"various":[88,180],"code":[89,123],"reorganizations":[90],"performed":[91],"implementers":[93],"high-performance":[95,154],"solvers":[96,161,235],"literature,":[99],"then":[101],"define":[102],"set":[104,172],"basic":[106],"operations":[107],"span":[109],"scheduling":[111,117,127,165],"choices,":[112],"thereby":[113],"defining":[114],"large":[116,164,171],"space.":[118],"Users":[119],"can":[120,159,168],"either":[121],"specify":[122],"transformations":[124],"or":[129],"use":[130],"an":[131,142,218],"autoscheduler.":[132],"Thallo":[133,158],"takes":[134],"as":[135,191,201],"input":[136],"compact,":[138],"shader-like":[139],"representation":[140],"energy":[143],"function":[144],"(potentially":[147],"auto-generated)":[148],"schedule,":[149],"translating":[150],"combination":[152],"GPU":[155,234],"solvers.":[156],"Since":[157],"generate":[160],"from":[162,208],"space,":[166],"it":[167],"handle":[169],"non-smooth":[177],"with":[179],"degrees":[181],"non-locality":[183],"compute-to-memory":[185],"ratios,":[186],"including":[187],"diverse":[188],"such":[190],"bundle":[192],"adjustment,":[193],"face":[194],"blendshape":[195],"fitting,":[196],"spatially-varying":[198],"Poisson":[199],"deconvolution,":[200],"seen":[202],"Figure":[204],"1.":[205],"Abstracting":[206],"schedules":[207],"optimization,":[210],"outperform":[212],"state-of-the-art":[213],"GPU-based":[214],"DSLs":[216],"average":[219],"16\u00d7":[221],"across":[222],"all":[223],"introduced":[225],"even":[230],"some":[231],"published":[232],"hand-written":[233],"30%+.":[237]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
