{"id":"https://openalex.org/W4416339311","doi":"https://doi.org/10.48550/arxiv.2511.10690","title":"Saying the Unsaid: Revealing the Hidden Language of Multimodal Systems Through Telephone Games","display_name":"Saying the Unsaid: Revealing the Hidden Language of Multimodal Systems Through Telephone Games","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W4416339311","doi":"https://doi.org/10.48550/arxiv.2511.10690"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2511.10690","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.10690","pdf_url":"https://arxiv.org/pdf/2511.10690","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2511.10690","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100615377","display_name":"Jun Zhao","orcid":"https://orcid.org/0000-0001-9724-7226"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhao, Juntu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100612041","display_name":"Jialing Zhang","orcid":"https://orcid.org/0000-0001-5484-3511"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jialing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072905534","display_name":"Chongxuan Li","orcid":"https://orcid.org/0000-0002-0912-9076"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Chongxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101424814","display_name":"Dequan Wang","orcid":"https://orcid.org/0000-0003-0877-4636"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Dequan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100615377"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6582000255584717,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6582000255584717,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.09989999979734421,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.057100001722574234,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.6480000019073486},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.5958999991416931},{"id":"https://openalex.org/keywords/multimodality","display_name":"Multimodality","score":0.49399998784065247},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.44609999656677246},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.423799991607666},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.4205000102519989},{"id":"https://openalex.org/keywords/multimodal-interaction","display_name":"Multimodal interaction","score":0.4146000146865845},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.4083000123500824}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7105000019073486},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.6480000019073486},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.5958999991416931},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5152999758720398},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.49399998784065247},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.48829999566078186},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.44609999656677246},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.423799991607666},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.4205000102519989},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.4146000146865845},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.4083000123500824},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.3765999972820282},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.3718000054359436},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3418999910354614},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3287999927997589},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3264999985694885},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29910001158714294},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C189645446","wikidata":"https://www.wikidata.org/wiki/Q350865","display_name":"Mirroring","level":2,"score":0.2590999901294708},{"id":"https://openalex.org/C2777152325","wikidata":"https://www.wikidata.org/wiki/Q108163","display_name":"Proposition","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.25360000133514404},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2511.10690","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.10690","pdf_url":"https://arxiv.org/pdf/2511.10690","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2511.10690","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.10690","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2511.10690","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.10690","pdf_url":"https://arxiv.org/pdf/2511.10690","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"closed-source":[1],"multimodal":[2,106,150,192,210,225],"systems":[3,193,211],"have":[4],"made":[5],"great":[6],"advances,":[7],"but":[8],"their":[9,20,34],"hidden":[10,35,207],"language":[11,208],"for":[12,171,216],"understanding":[13,104],"the":[14,28,38,42,57,66,69,76,87,98,103,122,197,206,214,220],"world":[15],"remains":[16],"opaque":[17],"because":[18],"of":[19,40,90,105,117,124,146,209,224],"black-box":[21],"architectures.":[22],"In":[23],"this":[24,83],"paper,":[25],"we":[26,95,140,154,176],"use":[27,177],"systems'":[29,58,151],"preference":[30,60,157],"bias":[31,61,158],"to":[32,80,179],"study":[33,200],"language:":[36],"During":[37],"process":[39],"compressing":[41],"input":[43,71],"images":[44],"(typically":[45],"containing":[46],"multiple":[47],"concepts)":[48],"into":[49,55],"texts":[50],"and":[51,166,187,195,212,222],"then":[52],"reconstructing":[53],"them":[54],"images,":[56],"inherent":[59],"introduces":[62],"specific":[63],"shifts":[64],"in":[65,92,102,149],"outputs,":[67],"disrupting":[68],"original":[70],"concept":[72,99,119,147,173,182],"co-occurrence.":[73],"We":[74,111],"employ":[75],"multi-round":[77],"\"telephone":[78],"game\"":[79],"strategically":[81],"leverage":[82],"bias.":[84],"By":[85,135],"observing":[86],"co-occurrence":[88],"frequencies":[89],"concepts":[91],"telephone":[93,126,130,138],"games,":[94,139],"quantitatively":[96],"investigate":[97],"connection":[100],"strength":[101],"systems,":[107],"i.e.,":[108],"\"hidden":[109],"language.\"":[110],"also":[112],"contribute":[113],"Telescope,":[114],"a":[115,143,202],"dataset":[116],"10,000+":[118],"pairs,":[120],"as":[121],"database":[123],"our":[125],"game":[127,131],"framework.":[128],"Our":[129],"is":[132],"test-time":[133],"scalable:":[134],"iteratively":[136],"running":[137],"can":[141,155],"construct":[142],"global":[144],"map":[145],"connections":[148],"understanding.":[152],"Here":[153],"identify":[156],"inherited":[159],"from":[160],"training,":[161],"assess":[162],"generalization":[163],"capability":[164],"advancement,":[165],"discover":[167],"more":[168],"stable":[169],"pathways":[170],"fragile":[172],"connections.":[174],"Furthermore,":[175],"Reasoning-LLMs":[178],"uncover":[180],"unexpected":[181],"relationships":[183],"that":[184],"transcend":[185],"textual":[186],"visual":[188],"similarities,":[189],"inferring":[190],"how":[191],"understand":[194],"simulate":[196],"world.":[198],"This":[199],"offers":[201],"new":[203],"perspective":[204],"on":[205,219],"lays":[213],"foundation":[215],"future":[217],"research":[218],"interpretability":[221],"controllability":[223],"systems.":[226]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-18T00:00:00"}
