{"id":"https://openalex.org/W2962770186","doi":"https://doi.org/10.18653/v1/d15-1179","title":"Fast, Flexible Models for Discovering Topic Correlation across Weakly-Related Collections","display_name":"Fast, Flexible Models for Discovering Topic Correlation across Weakly-Related Collections","publication_year":2015,"publication_date":"2015-01-01","ids":{"openalex":"https://openalex.org/W2962770186","doi":"https://doi.org/10.18653/v1/d15-1179","mag":"2962770186"},"language":"en","primary_location":{"id":"doi:10.18653/v1/d15-1179","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d15-1179","pdf_url":"https://www.aclweb.org/anthology/D15-1179.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/D15-1179.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100434263","display_name":"Jingwei Zhang","orcid":"https://orcid.org/0000-0002-1681-6608"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jingwei Zhang","raw_affiliation_strings":["Department of Computer Science, Columbia University"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Columbia University","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016270163","display_name":"Aaron Gerow","orcid":"https://orcid.org/0000-0002-8279-1322"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aaron Gerow","raw_affiliation_strings":["Computation Institute, University of Chicago"],"affiliations":[{"raw_affiliation_string":"Computation Institute, University of Chicago","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109267551","display_name":"Jaan Altosaar","orcid":null},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jaan Altosaar","raw_affiliation_strings":["Department of Physics, Princeton University"],"affiliations":[{"raw_affiliation_string":"Department of Physics, Princeton University","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111597343","display_name":"James M. B. Evans","orcid":null},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Evans","raw_affiliation_strings":["Computation Institute, University of Chicago","Department of Sociology, University of Chicago"],"affiliations":[{"raw_affiliation_string":"Computation Institute, University of Chicago","institution_ids":["https://openalex.org/I40347166"]},{"raw_affiliation_string":"Department of Sociology, University of Chicago","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049740369","display_name":"Richard Jean So","orcid":"https://orcid.org/0009-0002-6442-055X"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard Jean So","raw_affiliation_strings":["Department of English Language and Literature, University of Chicago"],"affiliations":[{"raw_affiliation_string":"Department of English Language and Literature, University of Chicago","institution_ids":["https://openalex.org/I40347166"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100434263"],"corresponding_institution_ids":["https://openalex.org/I78577930"],"apc_list":null,"apc_paid":null,"fwci":0.62860183,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.86617464,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1554","last_page":"1564"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6756632328033447},{"id":"https://openalex.org/keywords/correlation","display_name":"Correlation","score":0.46497344970703125},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3759458661079407},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1177511215209961}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6756632328033447},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.46497344970703125},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3759458661079407},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1177511215209961},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/d15-1179","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d15-1179","pdf_url":"https://www.aclweb.org/anthology/D15-1179.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/d15-1179","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d15-1179","pdf_url":"https://www.aclweb.org/anthology/D15-1179.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306193","display_name":"John Templeton Foundation","ror":"https://ror.org/035tnyy05"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2962770186.pdf","grobid_xml":"https://content.openalex.org/works/W2962770186.grobid-xml"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W58017990","https://openalex.org/W203054622","https://openalex.org/W1149064364","https://openalex.org/W1609010894","https://openalex.org/W1831355885","https://openalex.org/W1866637071","https://openalex.org/W1880262756","https://openalex.org/W1982474113","https://openalex.org/W2041517243","https://openalex.org/W2052261215","https://openalex.org/W2053725995","https://openalex.org/W2061806977","https://openalex.org/W2103587173","https://openalex.org/W2107743791","https://openalex.org/W2112247328","https://openalex.org/W2115529864","https://openalex.org/W2116137244","https://openalex.org/W2122172122","https://openalex.org/W2130339025","https://openalex.org/W2144100511","https://openalex.org/W2147946282","https://openalex.org/W2150731624","https://openalex.org/W2151447176","https://openalex.org/W2158266063","https://openalex.org/W2172135926","https://openalex.org/W2402203825","https://openalex.org/W2521521051","https://openalex.org/W4233135949","https://openalex.org/W4251068582","https://openalex.org/W6602366756"],"related_works":["https://openalex.org/W2384888906","https://openalex.org/W2376314740","https://openalex.org/W2366644548","https://openalex.org/W2357241418","https://openalex.org/W2119214692","https://openalex.org/W2119135658","https://openalex.org/W2115485936","https://openalex.org/W2153015554","https://openalex.org/W2146184373","https://openalex.org/W2103338134"],"abstract_inverted_index":{"Weak":[0],"topic":[1,19,25,58],"correlation":[2],"across":[3],"document":[4],"collections":[5,13,50,124],"with":[6],"different":[7],"numbers":[8,71],"of":[9,56,72,118,125],"topics":[10],"in":[11,48,53,123],"individual":[12],"presents":[14],"challenges":[15],"for":[16,79],"existing":[17],"cross-collection":[18],"models.This":[20],"paper":[21],"introduces":[22],"two":[23],"probabilistic":[24],"models,":[26],"Correlated":[27,31],"LDA":[28],"(C-LDA)":[29],"and":[30,43,93,127],"HDP":[32],"(C-HDP).These":[33],"address":[34],"problems":[35],"that":[36],"can":[37],"arise":[38],"when":[39],"analyzing":[40],"large,":[41],"asymmetric,":[42],"potentially":[44],"weakly-related":[45,49],"collections.Topic":[46],"correlations":[47],"typically":[51],"lie":[52],"the":[54,57,91],"tail":[55,78],"distribution,":[59],"where":[60],"they":[61],"would":[62],"be":[63],"overlooked":[64],"by":[65],"models":[66,83,100],"unable":[67],"to":[68,108],"fit":[69],"large":[70],"topics.To":[73],"efficiently":[74],"model":[75],"this":[76],"long":[77],"large-scale":[80],"analysis,":[81],"our":[82],"implement":[84],"a":[85,115],"parallel":[86],"sampling":[87],"algorithm":[88],"based":[89],"on":[90,104],"Metropolis-Hastings":[92],"alias":[94],"methods":[95],"(Yuan":[96],"et":[97],"al.,":[98],"2015).The":[99],"are":[101],"first":[102],"evaluated":[103],"synthetic":[105],"data,":[106],"generated":[107],"simulate":[109],"various":[110],"collection-level":[111],"asymmetries.We":[112],"then":[113],"present":[114],"case":[116],"study":[117],"modeling":[119],"over":[120],"300k":[121],"documents":[122],"sciences":[126],"humanities":[128],"research":[129],"from":[130],"JSTOR.":[131]},"counts_by_year":[{"year":2020,"cited_by_count":2},{"year":2016,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
