{"id":"https://openalex.org/W4417329376","doi":"https://doi.org/10.3390/info16121106","title":"Adaptive Token Boundaries: Towards Integrating Human Chunking Mechanisms into Multimodal LLMs","display_name":"Adaptive Token Boundaries: Towards Integrating Human Chunking Mechanisms into Multimodal LLMs","publication_year":2025,"publication_date":"2025-12-15","ids":{"openalex":"https://openalex.org/W4417329376","doi":"https://doi.org/10.3390/info16121106"},"language":"en","primary_location":{"id":"doi:10.3390/info16121106","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info16121106","pdf_url":"https://www.mdpi.com/2078-2489/16/12/1106/pdf?version=1765809552","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2078-2489/16/12/1106/pdf?version=1765809552","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032360810","display_name":"Dongxing Yu","orcid":"https://orcid.org/0000-0002-5976-1782"},"institutions":[{"id":"https://openalex.org/I3133083760","display_name":"Sanda University","ror":"https://ror.org/00tp01q71","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133083760"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dongxing Yu","raw_affiliation_strings":["School of Education, Sanda University, Shanghai 314100, China"],"raw_orcid":"https://orcid.org/0000-0002-5976-1782","affiliations":[{"raw_affiliation_string":"School of Education, Sanda University, Shanghai 314100, China","institution_ids":["https://openalex.org/I3133083760"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5032360810"],"corresponding_institution_ids":["https://openalex.org/I3133083760"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.9206,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.81684053,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"16","issue":"12","first_page":"1106","last_page":"1106"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9531999826431274,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9531999826431274,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10465","display_name":"Neurobiology of Language and Bilingualism","score":0.008299999870359898,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.007499999832361937,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/chunking","display_name":"Chunking (psychology)","score":0.633899986743927},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.5396000146865845},{"id":"https://openalex.org/keywords/parallels","display_name":"Parallels","score":0.4973999857902527},{"id":"https://openalex.org/keywords/lexical-analysis","display_name":"Lexical analysis","score":0.4925999939441681},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4577000141143799},{"id":"https://openalex.org/keywords/computational-model","display_name":"Computational model","score":0.4142000079154968},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.4025000035762787},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3871999979019165},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.357699990272522},{"id":"https://openalex.org/keywords/soar","display_name":"Soar","score":0.3537999987602234}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.730400025844574},{"id":"https://openalex.org/C203357204","wikidata":"https://www.wikidata.org/wiki/Q1089605","display_name":"Chunking (psychology)","level":2,"score":0.633899986743927},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5503000020980835},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.5396000146865845},{"id":"https://openalex.org/C2775922551","wikidata":"https://www.wikidata.org/wiki/Q7135033","display_name":"Parallels","level":2,"score":0.4973999857902527},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.4925999939441681},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4577000141143799},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4180000126361847},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.4142000079154968},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.4025000035762787},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3871999979019165},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.357699990272522},{"id":"https://openalex.org/C17305859","wikidata":"https://www.wikidata.org/wiki/Q382944","display_name":"Soar","level":2,"score":0.3537999987602234},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3521000146865845},{"id":"https://openalex.org/C87868495","wikidata":"https://www.wikidata.org/wiki/Q750843","display_name":"Information processing","level":2,"score":0.3434999883174896},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33480000495910645},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.33390000462532043},{"id":"https://openalex.org/C166052673","wikidata":"https://www.wikidata.org/wiki/Q83021","display_name":"Empirical evidence","level":2,"score":0.3231000006198883},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.31299999356269836},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.31060001254081726},{"id":"https://openalex.org/C161407221","wikidata":"https://www.wikidata.org/wiki/Q4382939","display_name":"Cognitive model","level":3,"score":0.3077999949455261},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.301800012588501},{"id":"https://openalex.org/C108154423","wikidata":"https://www.wikidata.org/wiki/Q1469792","display_name":"Salience (neuroscience)","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C2993724205","wikidata":"https://www.wikidata.org/wiki/Q315","display_name":"Human language","level":2,"score":0.2825999855995178},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.2822999954223633},{"id":"https://openalex.org/C20854674","wikidata":"https://www.wikidata.org/wiki/Q4386060","display_name":"Cognitive architecture","level":3,"score":0.2712000012397766},{"id":"https://openalex.org/C51620047","wikidata":"https://www.wikidata.org/wiki/Q23399","display_name":"Memetics","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C2777393815","wikidata":"https://www.wikidata.org/wiki/Q662991","display_name":"Danaus","level":3,"score":0.2623000144958496},{"id":"https://openalex.org/C142816647","wikidata":"https://www.wikidata.org/wiki/Q5573018","display_name":"Glyph (data visualization)","level":3,"score":0.2565000057220459},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.25440001487731934}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/info16121106","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info16121106","pdf_url":"https://www.mdpi.com/2078-2489/16/12/1106/pdf?version=1765809552","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:c76b9c7caa974363af7f31fc32852b9d","is_oa":true,"landing_page_url":"https://doaj.org/article/c76b9c7caa974363af7f31fc32852b9d","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, Vol 16, Iss 12, p 1106 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/info16121106","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info16121106","pdf_url":"https://www.mdpi.com/2078-2489/16/12/1106/pdf?version=1765809552","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321881","display_name":"Shanghai Municipal Education Commission","ror":"https://ror.org/05tewj457"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4417329376.pdf","grobid_xml":"https://content.openalex.org/works/W4417329376.grobid-xml"},"referenced_works_count":16,"referenced_works":["https://openalex.org/W1933349210","https://openalex.org/W2013112874","https://openalex.org/W2066785847","https://openalex.org/W2089632738","https://openalex.org/W2091785129","https://openalex.org/W2123713131","https://openalex.org/W2204668899","https://openalex.org/W2526072446","https://openalex.org/W2579555219","https://openalex.org/W2589273976","https://openalex.org/W2951583631","https://openalex.org/W2962784628","https://openalex.org/W2963305465","https://openalex.org/W2963518342","https://openalex.org/W3035267217","https://openalex.org/W4406400870"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,12,50,106],"multimodal":[3,29],"large":[4],"language":[5],"models":[6,122],"(MLLMs)":[7],"have":[8],"demonstrated":[9],"remarkable":[10],"capabilities":[11],"processing":[13],"diverse":[14],"data":[15],"types,":[16],"yet":[17],"significant":[18,118],"disparities":[19],"persist":[20],"between":[21,41,158],"human":[22,42,56,84,159],"cognitive":[23,107],"processes":[24],"and":[25,46,102,145,161],"computational":[26],"approaches":[27],"to":[28,77,151],"information":[30,85],"integration.":[31],"This":[32],"research":[33],"presents":[34],"a":[35,89],"systematic":[36],"investigation":[37],"into":[38],"the":[39,79,152,156],"parallels":[40],"cross-modal":[43,94],"chunking":[44],"mechanisms":[45,104],"token":[47],"representation":[48],"methodologies":[49],"MLLMs.":[51],"Through":[52],"empirical":[53,166],"studies":[54],"comparing":[55],"performance":[57],"patterns":[58,144],"with":[59],"model":[60],"behaviors":[61],"across":[62],"visual\u2013linguistic":[63],"tasks,":[64],"we":[65],"demonstrate":[66,112],"that":[67,96,113],"conventional":[68],"static":[69],"tokenization":[70,95],"schemes":[71],"fundamentally":[72],"constrain":[73],"current":[74],"models\u2019":[75],"capacity":[76],"simulate":[78],"dynamic,":[80],"context-sensitive":[81],"nature":[82],"of":[83,155],"processing.":[86],"We":[87],"propose":[88],"novel":[90],"framework":[91],"for":[92,168],"dynamic":[93],"incorporates":[97],"adaptive":[98],"boundaries,":[99],"hierarchical":[100],"representations,":[101],"alignment":[103],"grounded":[105],"science":[108],"principles.":[109],"Quantitative":[110],"evaluations":[111],"our":[114],"approach":[115],"yields":[116],"statistically":[117],"improvements":[119],"over":[120],"state-of-the-art":[121],"on":[123,127,135],"benchmark":[124],"tasks":[125],"(+7.8%":[126],"Visual":[128],"Question":[129],"Answering":[130],"(p":[131],"&lt;":[132],"0.001),":[133],"5.3%":[134],"Complex":[136],"Scene":[137],"Description)":[138],"while":[139,164],"exhibiting":[140],"more":[141,170],"human-aligned":[142],"error":[143],"attention":[146],"distributions.":[147],"These":[148],"findings":[149],"contribute":[150],"theoretical":[153],"understanding":[154],"relationship":[157],"cognition":[160],"artificial":[162],"intelligence,":[163],"providing":[165],"evidence":[167],"developing":[169],"cognitively":[171],"plausible":[172],"AI":[173],"systems.":[174]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-13T06:13:01.061226","created_date":"2025-12-15T00:00:00"}
