{"id":"https://openalex.org/W4385262477","doi":"https://doi.org/10.48550/arxiv.2307.12981","title":"3D-LLM: Injecting the 3D World into Large Language Models","display_name":"3D-LLM: Injecting the 3D World into Large Language Models","publication_year":2023,"publication_date":"2023-07-24","ids":{"openalex":"https://openalex.org/W4385262477","doi":"https://doi.org/10.48550/arxiv.2307.12981"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2307.12981","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.12981","pdf_url":"https://arxiv.org/pdf/2307.12981","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2307.12981","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000069468","display_name":"Yining Hong","orcid":"https://orcid.org/0000-0002-0518-2099"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hong, Yining","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109625651","display_name":"Haoyu Zhen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhen, Haoyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004837552","display_name":"Peihao Chen","orcid":"https://orcid.org/0000-0002-6847-1621"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Peihao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075364910","display_name":"Shuhong Zheng","orcid":"https://orcid.org/0000-0003-3160-9532"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Shuhong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101182304","display_name":"Yilun Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Yilun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030471889","display_name":"Zhenfang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhenfang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5040877128","display_name":"Chuang Gan","orcid":"https://orcid.org/0000-0003-4031-5886"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gan, Chuang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5000069468"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":39,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9605000019073486,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.7428988218307495},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7237958908081055},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5366714596748352},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4987952709197998},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4464412331581116},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.41681450605392456},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4110785722732544},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38798287510871887},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10826751589775085},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08736279606819153}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.7428988218307495},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7237958908081055},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5366714596748352},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4987952709197998},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4464412331581116},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.41681450605392456},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4110785722732544},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38798287510871887},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10826751589775085},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08736279606819153},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2307.12981","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.12981","pdf_url":"https://arxiv.org/pdf/2307.12981","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2307.12981","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2307.12981","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2307.12981","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.12981","pdf_url":"https://arxiv.org/pdf/2307.12981","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6200000047683716}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W3098003361"],"abstract_inverted_index":{"Large":[0],"language":[1,61],"models":[2,23,62],"(LLMs)":[3],"and":[4,46,63,78,83,105,208,235],"Vision-Language":[5],"Models":[6],"(VLMs)":[7],"have":[8],"been":[9],"proven":[10],"to":[11,54,120,157],"excel":[12],"at":[13],"multiple":[14],"tasks,":[15,90],"such":[16,39],"as":[17,21,40,81,154],"commonsense":[18],"reasoning.":[19],"Powerful":[20],"these":[22,127],"can":[24,73,168],"be,":[25],"they":[26],"are":[27,118],"not":[28],"grounded":[29],"in":[30],"the":[31,56,189,230],"3D":[32,57,75,95,100,137,142,164,171,204],"physical":[33],"world,":[34],"which":[35],"involves":[36],"richer":[37],"concepts":[38],"spatial":[41,172],"relationships,":[42],"affordances,":[43],"physics,":[44],"layout,":[45],"so":[47,106],"on.":[48,107],"In":[49],"this":[50],"work,":[51],"we":[52,115,117,133,150],"propose":[53],"inject":[55],"world":[58],"into":[59],"large":[60,186],"introduce":[64],"a":[65,85,136,163,185],"whole":[66],"new":[67],"family":[68],"of":[69,88,111,232],"3D-LLMs.":[70,160],"Specifically,":[71],"3D-LLMs":[72,167],"take":[74],"point":[76],"clouds":[77],"their":[79],"features":[80,143],"input":[82],"perform":[84,226],"diverse":[86],"set":[87],"3D-related":[89],"including":[91],"captioning,":[92,94,205],"dense":[93],"question":[96],"answering,":[97],"task":[98,206],"decomposition,":[99],"grounding,":[101],"3D-assisted":[102,209],"dialog,":[103],"navigation,":[104],"Using":[108],"three":[109],"types":[110],"prompting":[112],"mechanisms":[113],"that":[114,140,178,212,222],"design,":[116],"able":[119],"collect":[121],"over":[122],"300k":[123],"3D-language":[124],"data":[125],"covering":[126],"tasks.":[128],"To":[129],"efficiently":[130],"train":[131,158],"3D-LLMs,":[132],"first":[134],"utilize":[135],"feature":[138],"extractor":[139],"obtains":[141],"from":[144],"rendered":[145],"multi-":[146],"view":[147],"images.":[148],"Then,":[149],"use":[151],"2D":[152,216],"VLMs":[153],"our":[155,159,179,200,213,223],"backbones":[156],"By":[161],"introducing":[162],"localization":[165],"mechanism,":[166],"better":[169],"capture":[170],"information.":[173],"Experiments":[174],"on":[175,199],"ScanQA":[176],"show":[177,211,221],"model":[180,214,224],"outperforms":[181,215],"state-of-the-art":[182,193],"baselines":[183],"by":[184,195],"margin":[187],"(e.g.,":[188],"BLEU-1":[190],"score":[191,194],"surpasses":[192],"9%).":[196],"Furthermore,":[197],"experiments":[198],"held-in":[201],"datasets":[202],"for":[203],"composition,":[207],"dialogue":[210],"VLMs.":[217,236],"Qualitative":[218],"examples":[219],"also":[220],"could":[225],"more":[227],"tasks":[228],"beyond":[229],"scope":[231],"existing":[233],"LLMs":[234],"Project":[237],"Page:":[238],":":[239],"https://vis-www.cs.umass.edu/3dllm/.":[240]},"counts_by_year":[{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":19},{"year":2023,"cited_by_count":4}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
