{"id":"https://openalex.org/W2981750465","doi":"https://doi.org/10.1145/3343031.3350879","title":"Exploiting Temporal Relationships in Video Moment Localization with Natural Language","display_name":"Exploiting Temporal Relationships in Video Moment Localization with Natural Language","publication_year":2019,"publication_date":"2019-10-15","ids":{"openalex":"https://openalex.org/W2981750465","doi":"https://doi.org/10.1145/3343031.3350879","mag":"2981750465"},"language":"en","primary_location":{"id":"doi:10.1145/3343031.3350879","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3343031.3350879","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3343031.3350879","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3343031.3350879","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100747794","display_name":"Songyang Zhang","orcid":"https://orcid.org/0000-0003-4316-3320"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Songyang Zhang","raw_affiliation_strings":["University of Rochester, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"University of Rochester, Rochester, NY, USA","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066326238","display_name":"Jinsong Su","orcid":"https://orcid.org/0000-0001-5606-7122"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinsong Su","raw_affiliation_strings":["Xiamen University, Xiamen, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055469774","display_name":"Jiebo Luo","orcid":"https://orcid.org/0000-0002-4516-9729"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiebo Luo","raw_affiliation_strings":["University of Rochester, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"University of Rochester, Rochester, NY, USA","institution_ids":["https://openalex.org/I5388228"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100747794"],"corresponding_institution_ids":["https://openalex.org/I5388228"],"apc_list":null,"apc_paid":null,"fwci":5.0612,"has_fulltext":false,"cited_by_count":70,"citation_normalized_percentile":{"value":0.96346937,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1230","last_page":"1238"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.823235809803009},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.6464889645576477},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6283503770828247},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5991198420524597},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5367041826248169},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5211279988288879},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.4731382429599762},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4348374307155609},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.42316585779190063},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4194630980491638},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.41184505820274353},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.11981135606765747},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.10906374454498291}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.823235809803009},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.6464889645576477},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6283503770828247},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5991198420524597},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5367041826248169},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5211279988288879},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.4731382429599762},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4348374307155609},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.42316585779190063},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4194630980491638},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.41184505820274353},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.11981135606765747},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.10906374454498291},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3343031.3350879","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3343031.3350879","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3343031.3350879","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3343031.3350879","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3343031.3350879","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3343031.3350879","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.699999988079071}],"awards":[{"id":"https://openalex.org/G4283415030","display_name":null,"funder_award_id":"1704337","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4319893781","display_name":null,"funder_award_id":"1722847","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5926384816","display_name":null,"funder_award_id":"1813709","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2981750465.pdf","grobid_xml":"https://content.openalex.org/works/W2981750465.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W560619751","https://openalex.org/W1512475734","https://openalex.org/W1686810756","https://openalex.org/W2104246439","https://openalex.org/W2172196656","https://openalex.org/W2250539671","https://openalex.org/W2507009361","https://openalex.org/W2538147493","https://openalex.org/W2558535589","https://openalex.org/W2611788449","https://openalex.org/W2626778328","https://openalex.org/W2796869301","https://openalex.org/W2798354744","https://openalex.org/W2798786641","https://openalex.org/W2890502146","https://openalex.org/W2891456603","https://openalex.org/W2891574165","https://openalex.org/W2894280539","https://openalex.org/W2895583029","https://openalex.org/W2897628926","https://openalex.org/W2903901502","https://openalex.org/W2952524542","https://openalex.org/W2962764817","https://openalex.org/W2962869524","https://openalex.org/W2963017553","https://openalex.org/W2963084773","https://openalex.org/W2963662190","https://openalex.org/W2964121744","https://openalex.org/W2964232540","https://openalex.org/W2964345792","https://openalex.org/W4210424844"],"related_works":["https://openalex.org/W2375873920","https://openalex.org/W2146114872","https://openalex.org/W2392060890","https://openalex.org/W2392760275","https://openalex.org/W2083530853","https://openalex.org/W2982905616","https://openalex.org/W2009831055","https://openalex.org/W2393172683","https://openalex.org/W2368686738","https://openalex.org/W2389383932"],"abstract_inverted_index":{"We":[0],"address":[1],"the":[2,29,41,75,90,100,105,141,150,154],"problem":[3],"of":[4,128,140],"video":[5,14],"moment":[6],"localization":[7],"with":[8,72],"natural":[9,19],"language,":[10],"i.e.":[11],"localizing":[12],"a":[13,18,32,52,60,67],"segment":[15,98],"described":[16],"by":[17,137],"language":[20],"sentence.":[21],"While":[22],"most":[23],"prior":[24],"work":[25],"focuses":[26],"on":[27,113,153],"grounding":[28],"query":[30],"as":[31],"whole,":[33],"temporal":[34,81],"dependencies":[35],"and":[36,80,93,99,108],"reasoning":[37],"between":[38,96],"events":[39],"within":[40],"text":[42],"are":[43,85],"not":[44],"fully":[45],"considered.":[46],"In":[47],"this":[48],"paper,":[49],"we":[50,120],"propose":[51],"novel":[53],"Temporal":[54],"Compositional":[55],"Modular":[56],"Network":[57],"(TCMN)":[58],"where":[59,131],"tree":[61],"attention":[62],"network":[63],"first":[64],"automatically":[65],"decomposes":[66],"sentence":[68],"into":[69],"three":[70],"descriptions":[71],"respect":[73],"to":[74,88,124],"main":[76,106],"event,":[77],"context":[78,109],"event":[79,107,110],"signal.":[82],"Two":[83],"modules":[84],"then":[86],"utilized":[87],"measure":[89],"visual":[91,142],"similarity":[92,95],"location":[94],"each":[97,132],"decomposed":[101],"descriptions.":[102],"Moreover,":[103],"since":[104],"may":[111],"rely":[112],"different":[114],"modalities":[115],"(RGB":[116],"or":[117],"optical":[118],"flow),":[119],"use":[121],"late":[122],"fusion":[123],"form":[125],"an":[126],"ensemble":[127],"four":[129],"models,":[130],"model":[133,148],"is":[134],"independently":[135],"trained":[136],"one":[138],"combination":[139],"input.":[143],"Experiments":[144],"show":[145],"that":[146],"our":[147],"outperforms":[149],"state-of-the-art":[151],"methods":[152],"TEMPO":[155],"dataset.":[156]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":16},{"year":2021,"cited_by_count":23},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
