{"id":"https://openalex.org/W7134041492","doi":"https://doi.org/10.1007/s11263-026-02736-z","title":"HiPrompt: Tuning-free Higher-Resolution Generation with Hierarchical MLLM Prompts","display_name":"HiPrompt: Tuning-free Higher-Resolution Generation with Hierarchical MLLM Prompts","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7134041492","doi":"https://doi.org/10.1007/s11263-026-02736-z"},"language":"en","primary_location":{"id":"doi:10.1007/s11263-026-02736-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11263-026-02736-z","pdf_url":null,"source":{"id":"https://openalex.org/S25538012","display_name":"International Journal of Computer Vision","issn_l":"0920-5691","issn":["0920-5691","1573-1405"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Vision","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1007/s11263-026-02736-z","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128275309","display_name":"Xinyu Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xinyu Liu","raw_affiliation_strings":["Hong Kong University of Science and Technology, HongKong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, HongKong, China","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101311554","display_name":"Yingqing He","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yingqing He","raw_affiliation_strings":["Hong Kong University of Science and Technology, HongKong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, HongKong, China","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101553481","display_name":"Lanqing Guo","orcid":"https://orcid.org/0000-0002-9452-4723"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lanqing Guo","raw_affiliation_strings":["The University of Texas at Austin, Austin, Texas, United States of America"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin, Austin, Texas, United States of America","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128223788","display_name":"Xiang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiang Li","raw_affiliation_strings":["Tsinghua University, BeiJing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University, BeiJing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102210423","display_name":"Bu Jin","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Bu Jin","raw_affiliation_strings":["Hong Kong University of Science and Technology, HongKong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, HongKong, China","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128274105","display_name":"Yan Li","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yan Li","raw_affiliation_strings":["Hong Kong University of Science and Technology, HongKong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, HongKong, China","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044330945","display_name":"Chi-Min Chan","orcid":"https://orcid.org/0009-0006-0218-3412"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Chi-Min Chan","raw_affiliation_strings":["Hong Kong University of Science and Technology, HongKong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, HongKong, China","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128218992","display_name":"Wei Xue","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Wei Xue","raw_affiliation_strings":["Hong Kong University of Science and Technology, HongKong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, HongKong, China","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128238433","display_name":"Wenhan Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Wenhan Luo","raw_affiliation_strings":["Hong Kong University of Science and Technology, HongKong, China"],"raw_orcid":"https://orcid.org/0000-0002-5697-4168","affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, HongKong, China","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101816327","display_name":"Qifeng Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Qifeng Liu","raw_affiliation_strings":["Hong Kong University of Science and Technology, HongKong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, HongKong, China","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013171902","display_name":"Yike Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yike Guo","raw_affiliation_strings":["Hong Kong University of Science and Technology, HongKong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, HongKong, China","institution_ids":["https://openalex.org/I200769079"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5128238433"],"corresponding_institution_ids":["https://openalex.org/I200769079"],"apc_list":{"value":2890,"currency":"EUR","value_usd":3690},"apc_paid":{"value":2890,"currency":"EUR","value_usd":3690},"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.33927582,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"134","issue":"4","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8792999982833862,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8792999982833862,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.04170000180602074,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.02710000053048134,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.685699999332428},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5985999703407288},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.506600022315979},{"id":"https://openalex.org/keywords/repetition","display_name":"Repetition (rhetorical device)","score":0.4731000065803528},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.45719999074935913},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4519999921321869},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.40939998626708984},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.40700000524520874}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8180999755859375},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.685699999332428},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5985999703407288},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5950999855995178},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.506600022315979},{"id":"https://openalex.org/C2776141515","wikidata":"https://www.wikidata.org/wiki/Q1274479","display_name":"Repetition (rhetorical device)","level":2,"score":0.4731000065803528},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.45719999074935913},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4519999921321869},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.40939998626708984},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.40700000524520874},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38449999690055847},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.3824999928474426},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.3176000118255615},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.3061999976634979},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C205372480","wikidata":"https://www.wikidata.org/wiki/Q210521","display_name":"Image resolution","level":2,"score":0.2858999967575073},{"id":"https://openalex.org/C2780217385","wikidata":"https://www.wikidata.org/wiki/Q2389284","display_name":"Hierarchical organization","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.26660001277923584},{"id":"https://openalex.org/C207467116","wikidata":"https://www.wikidata.org/wiki/Q4385666","display_name":"Inverse","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2542000114917755}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11263-026-02736-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11263-026-02736-z","pdf_url":null,"source":{"id":"https://openalex.org/S25538012","display_name":"International Journal of Computer Vision","issn_l":"0920-5691","issn":["0920-5691","1573-1405"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Vision","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s11263-026-02736-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11263-026-02736-z","pdf_url":null,"source":{"id":"https://openalex.org/S25538012","display_name":"International Journal of Computer Vision","issn_l":"0920-5691","issn":["0920-5691","1573-1405"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Vision","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320323537","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W3191805365","https://openalex.org/W3203631022","https://openalex.org/W3216352822","https://openalex.org/W4312388283","https://openalex.org/W4312694728","https://openalex.org/W4312933868","https://openalex.org/W4390872006","https://openalex.org/W4390872297","https://openalex.org/W4390873054","https://openalex.org/W4393148714","https://openalex.org/W4393153503","https://openalex.org/W4394625750","https://openalex.org/W4402667895","https://openalex.org/W4402702908","https://openalex.org/W4402715897","https://openalex.org/W4402727250","https://openalex.org/W4402727647","https://openalex.org/W4402727764","https://openalex.org/W4402727903","https://openalex.org/W4402754309","https://openalex.org/W4403003238","https://openalex.org/W4403705967","https://openalex.org/W4403716358","https://openalex.org/W4403760842","https://openalex.org/W4403841908","https://openalex.org/W4403888637","https://openalex.org/W4404575065","https://openalex.org/W4404625217","https://openalex.org/W4404690153","https://openalex.org/W4404770126","https://openalex.org/W4409262753","https://openalex.org/W4409366419","https://openalex.org/W4409368325","https://openalex.org/W4411055345","https://openalex.org/W7133185348","https://openalex.org/W7133226442","https://openalex.org/W7133250524"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"The":[1,70,181],"potential":[2],"for":[3,42],"higher-resolution":[4,170],"image":[5,171],"generation":[6,44,136],"using":[7],"pretrained":[8],"diffusion":[9],"models":[10,15],"is":[11,112],"immense.":[12],"However,":[13],"these":[14],"often":[16],"struggle":[17],"with":[18,158],"object":[19,175],"repetition":[20,176],"and":[21,30,76,105,116,144,152,156,177,183],"structural":[22,179],"artifacts":[23],"especially":[24],"when":[25],"scaling":[26],"to":[27,101,137],"4K":[28],"resolution":[29],"beyond.":[31],"Our":[32],"analysis":[33],"reveals":[34],"that":[35,61,164],"causes":[36],"the":[37,43,63,81,135,146,189],"problem,":[38],"a":[39,57],"single":[40],"prompt":[41,83,123],"of":[45],"multiple":[46],"scales":[47],"provides":[48],"insufficient":[49],"efficacy.":[50],"To":[51],"address":[52],"this,":[53],"we":[54],"propose":[55],"HiPrompt,":[56],"new":[58],"tuning-free":[59],"solution":[60],"tackles":[62],"above":[64],"problems":[65],"by":[66,99],"introducing":[67],"hierarchical":[68,71,129],"prompts.":[69],"prompts":[72],"provide":[73],"both":[74],"global":[75,82,153],"local":[77,92,141,151],"semantic":[78,130],"guidance.":[79,131],"Specifically,":[80],"captures":[84],"overall":[85],"scene":[86],"semantics":[87],"from":[88,95],"user":[89],"input,":[90],"while":[91],"guidance":[93],"comes":[94],"patch-wise":[96],"descriptions":[97],"generated":[98,147],"MLLMs":[100],"refine":[102],"regional":[103],"structures":[104],"textures.":[106],"Furthermore,":[107],"during":[108],"inverse":[109],"denoising,":[110],"noise":[111],"decomposed":[113],"into":[114],"low-":[115],"high-frequency":[117],"components,":[118],"each":[119],"conditioned":[120],"on":[121,140,188],"different":[122],"levels,":[124],"facilitating":[125],"prompt-guided":[126],"denoising":[127],"under":[128],"It":[132],"further":[133],"allows":[134],"focus":[138],"more":[139],"spatial":[142],"regions":[143],"ensures":[145],"images":[148],"maintain":[149],"coherent":[150],"semantics,":[154],"structures,":[155],"textures":[157],"high":[159],"definition.":[160],"Extensive":[161],"experiments":[162],"demonstrate":[163],"HiPrompt":[165],"outperforms":[166],"state-of-the-art":[167],"works":[168],"in":[169],"generation,":[172],"significantly":[173],"reducing":[174],"enhancing":[178],"quality.":[180],"demo":[182],"code":[184],"can":[185],"be":[186],"found":[187],"project":[190],"website:":[191],"https://liuxinyv.github.io/HiPrompt/":[192],".":[193]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-03-07T00:00:00"}
