{"id":"https://openalex.org/W2895990696","doi":"https://doi.org/10.1017/s1351324918000359","title":"MUSED: A multimedia multi-document dataset for topic segmentation","display_name":"MUSED: A multimedia multi-document dataset for topic segmentation","publication_year":2018,"publication_date":"2018-10-22","ids":{"openalex":"https://openalex.org/W2895990696","doi":"https://doi.org/10.1017/s1351324918000359","mag":"2895990696"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324918000359","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324918000359","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066439336","display_name":"Pedro Mota","orcid":"https://orcid.org/0000-0002-1003-5640"},"institutions":[{"id":"https://openalex.org/I4387152517","display_name":"Instituto Superior T\u00e9cnico","ror":"https://ror.org/03db2by73","country_code":"PT","type":"education","lineage":["https://openalex.org/I141596103","https://openalex.org/I4387152517"]},{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["PT","US"],"is_corresponding":false,"raw_author_name":"PEDRO MOTA","raw_affiliation_strings":["Instituto Superior T\u00e9cnico, Carnegie Mellon University, Rua Alves Redol 9, Lisbon 1000-029, Portugal e-mail:","INSTITUTO SUPERIOR TECNICO"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Instituto Superior T\u00e9cnico, Carnegie Mellon University, Rua Alves Redol 9, Lisbon 1000-029, Portugal e-mail:","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"INSTITUTO SUPERIOR TECNICO","institution_ids":["https://openalex.org/I4387152517"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077285164","display_name":"Maxine Esk\u00e9nazi","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"MAXINE ESKENAZI","raw_affiliation_strings":["Carnegie Mellon University, 6413 Gates Hillman Complex, 5000 Forbes Ave, Pittsburgh, PA 15213, USA e-mail:","Carnegie Mellon University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, 6413 Gates Hillman Complex, 5000 Forbes Ave, Pittsburgh, PA 15213, USA e-mail:","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067178556","display_name":"Lu\u00edsa Coheur","orcid":"https://orcid.org/0000-0002-2456-5028"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]},{"id":"https://openalex.org/I4387152517","display_name":"Instituto Superior T\u00e9cnico","ror":"https://ror.org/03db2by73","country_code":"PT","type":"education","lineage":["https://openalex.org/I141596103","https://openalex.org/I4387152517"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"LU\u00cdSA COHEUR","raw_affiliation_strings":["Instituto Superior T\u00e9cnico, Rua Alves Redol 9, Lisbon 1000-029, Portugal e-mail:","INSTITUTO SUPERIOR TECNICO"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Instituto Superior T\u00e9cnico, Rua Alves Redol 9, Lisbon 1000-029, Portugal e-mail:","institution_ids":["https://openalex.org/I121345201"]},{"raw_affiliation_string":"INSTITUTO SUPERIOR TECNICO","institution_ids":["https://openalex.org/I4387152517"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.11364622,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"24","issue":"6","first_page":"921","last_page":"946"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8961289525032043},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7892118096351624},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.6079659461975098},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5629023909568787},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.45952942967414856},{"id":"https://openalex.org/keywords/market-segmentation","display_name":"Market segmentation","score":0.4564746022224426},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.38004326820373535},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2666013240814209}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8961289525032043},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7892118096351624},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.6079659461975098},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5629023909568787},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.45952942967414856},{"id":"https://openalex.org/C125308379","wikidata":"https://www.wikidata.org/wiki/Q363057","display_name":"Market segmentation","level":2,"score":0.4564746022224426},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.38004326820373535},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2666013240814209},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s1351324918000359","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324918000359","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47999998927116394,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W116782006","https://openalex.org/W801269729","https://openalex.org/W887185921","https://openalex.org/W1554540371","https://openalex.org/W1558643924","https://openalex.org/W1582534817","https://openalex.org/W1592823049","https://openalex.org/W1626945812","https://openalex.org/W1669912781","https://openalex.org/W1777978449","https://openalex.org/W1828401780","https://openalex.org/W1862888253","https://openalex.org/W1983814883","https://openalex.org/W1985741469","https://openalex.org/W2015933299","https://openalex.org/W2022677886","https://openalex.org/W2027823133","https://openalex.org/W2043406630","https://openalex.org/W2053154970","https://openalex.org/W2067002391","https://openalex.org/W2069207503","https://openalex.org/W2089094722","https://openalex.org/W2092062917","https://openalex.org/W2106918957","https://openalex.org/W2113385376","https://openalex.org/W2118612506","https://openalex.org/W2124585778","https://openalex.org/W2128709346","https://openalex.org/W2130695933","https://openalex.org/W2133943399","https://openalex.org/W2137320444","https://openalex.org/W2141403362","https://openalex.org/W2144092076","https://openalex.org/W2148818577","https://openalex.org/W2153222072","https://openalex.org/W2154593091","https://openalex.org/W2159083595","https://openalex.org/W2162021827","https://openalex.org/W2165232124","https://openalex.org/W2169142063","https://openalex.org/W2172125983","https://openalex.org/W2250539671","https://openalex.org/W2251295945","https://openalex.org/W2303837218","https://openalex.org/W2304421473","https://openalex.org/W2596567068","https://openalex.org/W2750725664","https://openalex.org/W2792932412","https://openalex.org/W2796728447","https://openalex.org/W6622717037","https://openalex.org/W6637289302","https://openalex.org/W6638575021"],"related_works":["https://openalex.org/W2592395359","https://openalex.org/W2045342254","https://openalex.org/W2535231171","https://openalex.org/W2142182663","https://openalex.org/W1501331687","https://openalex.org/W4255512592","https://openalex.org/W2501551404","https://openalex.org/W2326647871","https://openalex.org/W4205247302","https://openalex.org/W2468652214"],"abstract_inverted_index":{"Abstract":[0],"Research":[1],"on":[2,8],"topic":[3,149,197],"segmentation":[4,150,198,234],"has":[5],"recently":[6],"focused":[7],"segmenting":[9],"documents":[10,15,31,44,57,87,104,168,212],"by":[11],"taking":[12],"advantage":[13,157],"of":[14,29,42,50,86,101,114,158,166,179,192,211],"covering":[16],"the":[17,40,51,76,112,147,153,164,174,177,184,209],"same":[18],"topics.":[19],"In":[20,47,107,140],"order":[21],"to":[22,69,123,129,145,203,225,242,248],"properly":[23],"evaluate":[24],"such":[25],"approaches,":[26],"a":[27,84,189,243],"dataset":[28,186],"related":[30,43,103,159],"is":[32,120,127,183],"needed.":[33],"However,":[34],"existing":[35],"datasets":[36,53],"are":[37],"limited":[38],"in":[39,94,173,208],"number":[41],"per":[45,105],"domain.":[46,106],"addition,":[48,141],"most":[49],"available":[52],"do":[54,236],"not":[55,237],"consider":[56],"from":[58,90,169,213],"different":[59,91,96,170,214,226],"media":[60,92,171,215,227],"sources":[61,172],"(PowerPoints,":[62],"videos,":[63],"etc.),":[64],"which":[65],"pose":[66],"specific":[67],"challenges":[68],"segmentation.":[70],"We":[71],"fill":[72],"this":[73,108],"gap":[74],"with":[75,98],"MU":[77],"ltimedia":[78],"SE":[79],"gmentation":[80],"D":[81],"ataset":[82],"(MUSED),":[83],"collection":[85],"manually":[88],"segmented,":[89],"sources,":[93,228],"seven":[95],"domains,":[97],"an":[99],"average":[100],"twenty":[102],"paper,":[109],"we":[110,142,162],"describe":[111],"process":[113],"building":[115],"MUSED.":[116],"A":[117],"multi-annotator":[118],"study":[119,163,204],"carried":[121],"out":[122],"determine":[124],"if":[125],"it":[126],"possible":[128],"observe":[130],"agreement":[131],"among":[132],"human":[133],"judges":[134],"and":[135,195,229],"characterize":[136],"their":[137],"disagreement":[138],"patterns.":[139],"use":[143],"MUSED":[144,182],"compare":[146],"state-of-the-art":[148],"techniques,":[151,199],"including":[152],"ones":[154],"that":[155,187,219,231,246],"take":[156],"documents.":[160],"Moreover,":[161],"impact":[165],"having":[167],"dataset.":[175],"To":[176],"best":[178],"our":[180],"knowledge,":[181],"first":[185],"allows":[188],"straightforward":[190],"evaluation":[191],"both":[193],"single-":[194],"multiple-documents":[196],"as":[200,202],"well":[201],"how":[205],"these":[206],"behave":[207],"presence":[210],"sources.":[216],"Results":[217],"show":[218],"some":[220],"techniques":[221],"are,":[222],"indeed,":[223],"sensitive":[224],"also":[230],"current":[232],"multi-document":[233],"models":[235],"outperform":[238],"previous":[239],"models,":[240],"pointing":[241],"research":[244],"line":[245],"needs":[247],"be":[249],"boosted.":[250]},"counts_by_year":[{"year":2022,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
