{"id":"https://openalex.org/W4391463028","doi":"https://doi.org/10.48550/arxiv.2401.17904","title":"Hi-SAM: Marrying Segment Anything Model for Hierarchical Text Segmentation","display_name":"Hi-SAM: Marrying Segment Anything Model for Hierarchical Text Segmentation","publication_year":2024,"publication_date":"2024-01-31","ids":{"openalex":"https://openalex.org/W4391463028","doi":"https://doi.org/10.48550/arxiv.2401.17904"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2401.17904","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.17904","pdf_url":"https://arxiv.org/pdf/2401.17904","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2401.17904","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070541608","display_name":"Maoyuan Ye","orcid":"https://orcid.org/0000-0002-4180-1096"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ye, Maoyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066905801","display_name":"Jing Zhang","orcid":"https://orcid.org/0000-0002-5230-6285"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026501335","display_name":"Juhua Liu","orcid":"https://orcid.org/0000-0002-3907-8820"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Juhua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100731335","display_name":"Chenyu Liu","orcid":"https://orcid.org/0000-0002-3654-1824"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Chenyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059859891","display_name":"Baocai Yin","orcid":"https://orcid.org/0000-0002-4164-6647"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Baocai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100331624","display_name":"Cong Liu","orcid":"https://orcid.org/0000-0002-9796-1371"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Cong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100745020","display_name":"Boxue Du","orcid":"https://orcid.org/0000-0002-7500-1134"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Bo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5074103823","display_name":"Dacheng Tao","orcid":"https://orcid.org/0000-0001-7225-5449"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Dacheng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5070541608"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.8436999917030334,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.8436999917030334,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.8162999749183655,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.8072999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7045400142669678},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5060338377952576},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41153600811958313},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3682355284690857}],"concepts":[{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7045400142669678},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5060338377952576},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41153600811958313},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3682355284690857}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2401.17904","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.17904","pdf_url":"https://arxiv.org/pdf/2401.17904","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2401.17904","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2401.17904","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2401.17904","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.17904","pdf_url":"https://arxiv.org/pdf/2401.17904","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1836220556","display_name":null,"funder_award_id":"62225113","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3648843532","display_name":null,"funder_award_id":"U23B2048","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5586136776","display_name":null,"funder_award_id":"2022YFB4500600","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G7138842952","display_name":null,"funder_award_id":"2023YFC2705700","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8400922664","display_name":null,"funder_award_id":"2023YFC2705700","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8527334069","display_name":null,"funder_award_id":"62076186","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320315254","display_name":"Innovative Research Group Project of the National Natural Science Foundation of China","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320324116","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4391463028.pdf","grobid_xml":"https://content.openalex.org/works/W4391463028.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2530322880","https://openalex.org/W1596801655","https://openalex.org/W3204019825"],"abstract_inverted_index":{"The":[0,251],"Segment":[1],"Anything":[2],"Model":[3],"(SAM),":[4],"a":[5,12,30,65,73,90,121,180],"profound":[6],"vision":[7],"foundation":[8],"model":[9,32,71,81],"pretrained":[10],"on":[11,116,196,201,220,231,240],"large-scale":[13],"dataset,":[14],"breaks":[15],"the":[16,85,96,101,111,117,142,169,187,210,232,241],"boundaries":[17],"of":[18,190],"general":[19],"segmentation":[20,42,69,138],"and":[21,51,136,161,176,198,217,228,237],"sparks":[22],"various":[23],"downstream":[24],"applications.":[25],"This":[26],"paper":[27],"introduces":[28],"Hi-SAM,":[29],"unified":[31],"leveraging":[33],"SAM":[34,63],"for":[35,156,168,203,213],"hierarchical":[36,123,157,215],"text":[37,68,87,98,148,158,205],"segmentation.":[38,206],"Hi-SAM":[39,114,128,145,172,222],"excels":[40],"in":[41,89,100,165],"across":[43,95],"four":[44,97],"hierarchies,":[45],"including":[46],"pixel-level":[47,67,86,147,204],"text,":[48],"word,":[49,174],"text-line,":[50,175],"paragraph,":[52],"while":[53],"realizing":[54],"layout":[55,163,218,244],"analysis":[56,164,219],"as":[57],"well.":[58],"Specifically,":[59],"we":[60,109],"first":[61],"turn":[62],"into":[64],"high-quality":[66],"(TS)":[70],"through":[72],"parameter-efficient":[74],"fine-tuning":[75],"approach.":[76],"We":[77],"use":[78],"this":[79],"TS":[80,118,192],"to":[82,209],"iteratively":[83],"generate":[84],"labels":[88,94],"semi-automatical":[91],"manner,":[92],"unifying":[93],"hierarchies":[99],"HierText":[102],"dataset.":[103],"Subsequently,":[104],"with":[105,120,179],"these":[106],"complete":[107],"labels,":[108],"launch":[110],"end-to-end":[112],"trainable":[113],"based":[115],"architecture":[119],"customized":[122],"mask":[124,132,159],"decoder.":[125],"During":[126],"inference,":[127],"offers":[129],"both":[130],"automatic":[131],"generation":[133,160],"(AMG)":[134],"mode":[135],"promptable":[137],"(PS)":[139],"mode.":[140],"In":[141],"AMG":[143],"mode,":[144,171],"segments":[146],"foreground":[149,154],"masks":[150,178],"initially,":[151],"then":[152],"samples":[153],"points":[155],"achieves":[162,223],"passing.":[166],"As":[167],"PS":[170],"provides":[173],"paragraph":[177,242],"single":[181],"point":[182],"click.":[183],"Experimental":[184],"results":[185],"show":[186],"state-of-the-art":[188],"performance":[189],"our":[191],"model:":[193],"84.86%":[194],"fgIOU":[195,200],"Total-Text":[197],"88.96%":[199],"TextSeg":[202],"Moreover,":[207],"compared":[208],"previous":[211],"specialist":[212],"joint":[214],"detection":[216],"HierText,":[221],"significant":[224],"improvements:":[225],"4.73%":[226],"PQ":[227,236],"5.39%":[229],"F1":[230,239],"text-line":[233],"level,":[234],"5.49%":[235],"7.39%":[238],"level":[243],"analysis,":[245],"requiring":[246],"$20\\times$":[247],"fewer":[248],"training":[249],"epochs.":[250],"code":[252],"is":[253],"available":[254],"at":[255],"https://github.com/ymy-k/Hi-SAM.":[256]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2024-02-02T00:00:00"}
