{"id":"https://openalex.org/W4403791871","doi":"https://doi.org/10.1145/3664647.3680614","title":"Cross-Task Knowledge Transfer for Semi-supervised Joint 3D Grounding and Captioning","display_name":"Cross-Task Knowledge Transfer for Semi-supervised Joint 3D Grounding and Captioning","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791871","doi":"https://doi.org/10.1145/3664647.3680614"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3680614","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680614","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101426086","display_name":"Yang Liu","orcid":"https://orcid.org/0009-0008-0319-7239"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yang Liu","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, State Key Laboratory of Multimedia Information Processing, Peking University, Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, State Key Laboratory of Multimedia Information Processing, Peking University, Beijing, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078220957","display_name":"Daizong Liu","orcid":"https://orcid.org/0000-0001-8179-4508"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daizong Liu","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, State Key Laboratory of Multimedia Information Processing, Peking University, Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, State Key Laboratory of Multimedia Information Processing, Peking University, Beijing, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001396675","display_name":"Zongming Guo","orcid":"https://orcid.org/0000-0002-4944-9621"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zongming Guo","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, State Key Laboratory of Multimedia Information Processing, Peking University, Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, State Key Laboratory of Multimedia Information Processing, Peking University, Beijing, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059045087","display_name":"Wei Hu","orcid":"https://orcid.org/0000-0002-9860-0922"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Hu","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, State Key Laboratory of Multimedia Information Processing, Peking University, Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, State Key Laboratory of Multimedia Information Processing, Peking University, Beijing, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101426086"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":1.745,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.86338617,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3818","last_page":"3827"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.7746224999427795},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7666720151901245},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.7230486273765564},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7155097723007202},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.4991300106048584},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4645198583602905},{"id":"https://openalex.org/keywords/knowledge-transfer","display_name":"Knowledge transfer","score":0.45503920316696167},{"id":"https://openalex.org/keywords/transfer","display_name":"Transfer (computing)","score":0.44088250398635864},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.41743776202201843},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39774537086486816},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.39026114344596863},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3600318729877472},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14983677864074707},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.13993105292320251},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.06175658106803894},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.05716264247894287},{"id":"https://openalex.org/keywords/architectural-engineering","display_name":"Architectural engineering","score":0.05083131790161133}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.7746224999427795},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7666720151901245},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.7230486273765564},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7155097723007202},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.4991300106048584},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4645198583602905},{"id":"https://openalex.org/C2776960227","wikidata":"https://www.wikidata.org/wiki/Q2586354","display_name":"Knowledge transfer","level":2,"score":0.45503920316696167},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.44088250398635864},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.41743776202201843},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39774537086486816},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39026114344596863},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3600318729877472},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14983677864074707},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.13993105292320251},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.06175658106803894},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.05716264247894287},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.05083131790161133},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3680614","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680614","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1593271688","https://openalex.org/W1895577753","https://openalex.org/W1956340063","https://openalex.org/W2250539671","https://openalex.org/W2594519801","https://openalex.org/W2600463316","https://openalex.org/W2770046775","https://openalex.org/W2963727135","https://openalex.org/W2988715931","https://openalex.org/W3034602892","https://openalex.org/W3035057392","https://openalex.org/W3103830808","https://openalex.org/W3133833192","https://openalex.org/W3140398265","https://openalex.org/W3173770676","https://openalex.org/W3175234951","https://openalex.org/W3178218920","https://openalex.org/W3182910454","https://openalex.org/W3183392001","https://openalex.org/W3203949114","https://openalex.org/W3206171352","https://openalex.org/W3216468633","https://openalex.org/W4214684415","https://openalex.org/W4221167937","https://openalex.org/W4312385518","https://openalex.org/W4312565984","https://openalex.org/W4312605608","https://openalex.org/W4312650216","https://openalex.org/W4312749817","https://openalex.org/W4312852845","https://openalex.org/W4312955495","https://openalex.org/W4313030842","https://openalex.org/W4313162371","https://openalex.org/W4385819979","https://openalex.org/W4386071469","https://openalex.org/W4386075583","https://openalex.org/W4386075930","https://openalex.org/W4386076628","https://openalex.org/W4387969417","https://openalex.org/W4389352409","https://openalex.org/W4390872002","https://openalex.org/W4390872240","https://openalex.org/W4390873695","https://openalex.org/W4391216149","https://openalex.org/W4393156188","https://openalex.org/W4403792400"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2949522393","https://openalex.org/W4399485860"],"abstract_inverted_index":{"3D":[0,22,100,113,120,151,158],"visual":[1],"grounding":[2,101,121,159,171,223,266],"is":[3,118],"a":[4,16,20,27,34,53,72,78,95,109,149,253],"fundamental":[5],"yet":[6],"important":[7],"task":[8,32],"in":[9,19,56,162,277],"multimedia":[10],"understanding,":[11],"which":[12],"aims":[13],"to":[14,26,68,134,197,230,281,297],"locate":[15],"specific":[17],"object":[18,136],"complicated":[21],"scene":[23,88],"semantically":[24],"according":[25],"text":[28,138],"description.":[29],"However,":[30],"this":[31,57,60,62],"requires":[33],"large":[35],"number":[36],"of":[37,39,48,181,201,236,248],"annotations":[38],"labeled":[40,187],"text-object":[41,80],"pairs":[42],"for":[43,98,140,156,189,239],"training,":[44],"so":[45],"the":[46,65,163,178,182,194,199,202,211,218,222,234,246,249,262,288,299],"scarcity":[47,301],"annotated":[49],"data":[50,89,133,188,238,300],"has":[51,90],"been":[52],"key":[54],"obstacle":[55],"task.":[58],"To":[59,243],"end,":[61],"paper":[63],"makes":[64],"first":[66,176],"attempt":[67],"introduce":[69],"and":[70,105,122,126,137,160,172,205,224,232,265,284],"address":[71],"new":[73,96],"semi-supervised":[74,164],"setting,":[75,165],"where":[76,166],"only":[77],"few":[79],"labels":[81],"are":[82,124],"provided":[83],"during":[84],"training.":[85],"Considering":[86],"most":[87],"no":[91],"annotation,":[92],"we":[93,147,192,216,251],"explore":[94],"solution":[97,296],"unlabeled":[99,132,212,237],"by":[102],"additionally":[103],"training":[104],"transferring":[106],"knowledge":[107,220],"from":[108],"correlated":[110],"task,":[111],"i.e.,":[112],"captioning.":[114],"Our":[115],"main":[116],"insight":[117],"that":[119],"captioning":[123,161,173,225],"complementary":[125],"can":[127,291],"be":[128],"iteratively":[129,206],"trained":[130],"with":[131,143,185,210],"provide":[135],"contexts":[139],"each":[141,167],"other":[142],"pseudo-label":[144,255],"learning.":[145],"Specifically,":[146],"propose":[148],"novel":[150],"Cross-Task":[152],"Teacher-Student":[153],"Framework":[154],"(3D-CTTSF)":[155],"joint":[157],"branch":[168,184,196],"contains":[169],"parallel":[170],"modules.":[174],"We":[175],"pre-train":[177],"two":[179,228],"modules":[180,226],"teacher":[183,203],"limited":[186],"warm-up.":[190],"Then,":[191],"train":[193],"student":[195],"mimic":[198],"ability":[200],"model":[204],"update":[207],"both":[208,278],"branches":[209,229],"data.":[213],"In":[214],"particular,":[215],"transfer":[217],"learned":[219],"between":[221],"across":[227],"generate":[231],"refine":[233],"pseudo-labels":[235,260],"providing":[240],"reliable":[241],"supervision.":[242],"further":[244],"improve":[245],"quality":[247],"pseudo-labels,":[250],"design":[252],"cross-task":[254],"generation":[256],"scheme,":[257],"filtering":[258],"low-quality":[259],"at":[261],"detection,":[263],"captioning,":[264],"levels,":[267],"respectively.":[268],"Experimental":[269],"results":[270],"on":[271],"various":[272],"datasets":[273],"show":[274],"competitive":[275],"performances":[276],"tasks":[279],"compared":[280],"previous":[282],"fully-":[283],"weakly-supervised":[285],"methods,":[286],"demonstrating":[287],"proposed":[289],"3D-CTTSF":[290],"serve":[292],"as":[293],"an":[294],"effective":[295],"overcome":[298],"issue.":[302]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
