{"id":"https://openalex.org/W2059719708","doi":"https://doi.org/10.1145/1835449.1835529","title":"Combining coregularization and consensus-based self-training for multilingual text categorization","display_name":"Combining coregularization and consensus-based self-training for multilingual text categorization","publication_year":2010,"publication_date":"2010-07-19","ids":{"openalex":"https://openalex.org/W2059719708","doi":"https://doi.org/10.1145/1835449.1835529","mag":"2059719708"},"language":"en","primary_location":{"id":"doi:10.1145/1835449.1835529","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1835449.1835529","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd international ACM SIGIR conference on Research and development in information retrieval","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://nrc-publications.canada.ca/eng/view/accepted/?id=cf783c37-e5ce-4280-a7cc-9e0b865245e7","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111528497","display_name":"Massih R. Amini","orcid":null},"institutions":[{"id":"https://openalex.org/I197604219","display_name":"National Academies of Sciences, Engineering, and Medicine","ror":"https://ror.org/02eq2w707","country_code":"US","type":"government","lineage":["https://openalex.org/I197604219"]},{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA","US"],"is_corresponding":true,"raw_author_name":"Massih R. Amini","raw_affiliation_strings":["National Research Council Canada, Gatineau, PQ, Canada","National Research Council Canada, Gatineau, PQ, Canada#TAB#"],"affiliations":[{"raw_affiliation_string":"National Research Council Canada, Gatineau, PQ, Canada","institution_ids":["https://openalex.org/I4210159778"]},{"raw_affiliation_string":"National Research Council Canada, Gatineau, PQ, Canada#TAB#","institution_ids":["https://openalex.org/I197604219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065257553","display_name":"Cyril Goutte","orcid":"https://orcid.org/0000-0003-4939-6555"},"institutions":[{"id":"https://openalex.org/I197604219","display_name":"National Academies of Sciences, Engineering, and Medicine","ror":"https://ror.org/02eq2w707","country_code":"US","type":"government","lineage":["https://openalex.org/I197604219"]},{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Cyril Goutte","raw_affiliation_strings":["National Research Council Canada, Gatineau, PQ, Canada","National Research Council Canada, Gatineau, PQ, Canada#TAB#"],"affiliations":[{"raw_affiliation_string":"National Research Council Canada, Gatineau, PQ, Canada","institution_ids":["https://openalex.org/I4210159778"]},{"raw_affiliation_string":"National Research Council Canada, Gatineau, PQ, Canada#TAB#","institution_ids":["https://openalex.org/I197604219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084360449","display_name":"Nicolas Usunier","orcid":"https://orcid.org/0000-0002-9324-1457"},"institutions":[{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Nicolas Usunier","raw_affiliation_strings":["Universit\u00e9 Pierre et Marie Curie (Paris 6), Paris, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Pierre et Marie Curie (Paris 6), Paris, France","institution_ids":["https://openalex.org/I39804081"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5111528497"],"corresponding_institution_ids":["https://openalex.org/I197604219","https://openalex.org/I4210159778"],"apc_list":null,"apc_paid":null,"fwci":1.858,"has_fulltext":true,"cited_by_count":16,"citation_normalized_percentile":{"value":0.8761462,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"475","last_page":"482"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7850420475006104},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7007272243499756},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.6598014831542969},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.6589663624763489},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5593727827072144},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5386239290237427},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.510312020778656},{"id":"https://openalex.org/keywords/co-training","display_name":"Co-training","score":0.5008397102355957},{"id":"https://openalex.org/keywords/text-categorization","display_name":"Text categorization","score":0.4935849905014038},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.4857800304889679},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4825519919395447},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.47113555669784546},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4488004148006439},{"id":"https://openalex.org/keywords/semi-supervised-learning","display_name":"Semi-supervised learning","score":0.34330153465270996},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.10173594951629639}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7850420475006104},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7007272243499756},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.6598014831542969},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.6589663624763489},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5593727827072144},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5386239290237427},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.510312020778656},{"id":"https://openalex.org/C2776959682","wikidata":"https://www.wikidata.org/wiki/Q17005296","display_name":"Co-training","level":3,"score":0.5008397102355957},{"id":"https://openalex.org/C2986744138","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Text categorization","level":3,"score":0.4935849905014038},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.4857800304889679},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4825519919395447},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.47113555669784546},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4488004148006439},{"id":"https://openalex.org/C58973888","wikidata":"https://www.wikidata.org/wiki/Q1041418","display_name":"Semi-supervised learning","level":2,"score":0.34330153465270996},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.10173594951629639},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/1835449.1835529","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1835449.1835529","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd international ACM SIGIR conference on Research and development in information retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:cisti-icist.nrc-cnrc.ca:cistinparc:15469835","is_oa":true,"landing_page_url":"https://nrc-publications.canada.ca/eng/view/object/?id=cf783c37-e5ce-4280-a7cc-9e0b865245e7","pdf_url":"https://nrc-publications.canada.ca/eng/view/accepted/?id=cf783c37-e5ce-4280-a7cc-9e0b865245e7","source":{"id":"https://openalex.org/S7407055245","display_name":"NPARC","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.178.6505","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.178.6505","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www-connex.lip6.fr/%7Eamini/Publis/MltViewSemisupDocCls_sigir10.pdf","raw_type":"text"},{"id":"pmh:oai:HAL:hal-01291883v1","is_oa":false,"landing_page_url":"https://hal.science/hal-01291883","pdf_url":null,"source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"The 33rd Annual ACM SIGIR Conference (SIGIR 2010), Jul 2010, Geneva, Switzerland. pp.475-482, &#x27E8;10.1145/1835449.1835529&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:cisti-icist.nrc-cnrc.ca:cistinparc:15469835","is_oa":true,"landing_page_url":"https://nrc-publications.canada.ca/eng/view/object/?id=cf783c37-e5ce-4280-a7cc-9e0b865245e7","pdf_url":"https://nrc-publications.canada.ca/eng/view/accepted/?id=cf783c37-e5ce-4280-a7cc-9e0b865245e7","source":{"id":"https://openalex.org/S7407055245","display_name":"NPARC","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.8100000023841858,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2059719708.pdf","grobid_xml":"https://content.openalex.org/works/W2059719708.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W118545087","https://openalex.org/W1479807131","https://openalex.org/W1482214997","https://openalex.org/W1489959797","https://openalex.org/W1546961578","https://openalex.org/W1560143607","https://openalex.org/W1987756646","https://openalex.org/W2031823405","https://openalex.org/W2035720976","https://openalex.org/W2048679005","https://openalex.org/W2107008379","https://openalex.org/W2133348086","https://openalex.org/W2136504847","https://openalex.org/W2139578439","https://openalex.org/W2140676093","https://openalex.org/W2142742813","https://openalex.org/W2145234365","https://openalex.org/W2145765191","https://openalex.org/W2597289420","https://openalex.org/W2798766386","https://openalex.org/W4244633107","https://openalex.org/W6628905179","https://openalex.org/W6680140577","https://openalex.org/W6681427677","https://openalex.org/W6750230808"],"related_works":["https://openalex.org/W2133556223","https://openalex.org/W1520691178","https://openalex.org/W2186473728","https://openalex.org/W4312414840","https://openalex.org/W34092691","https://openalex.org/W2891078859","https://openalex.org/W192740413","https://openalex.org/W2131153761","https://openalex.org/W60792937","https://openalex.org/W2059598258"],"abstract_inverted_index":{"We":[0,20,52,116],"investigate":[1],"the":[2,25,38,79,83,127,130,135,144,170],"problem":[3,23],"of":[4,27,37,78,89,129,132,143],"learning":[5,44,66,137],"document":[6],"classifiers":[7,75],"in":[8,24,45,67,93,169],"a":[9,87,111,118,140],"multilingual":[10,141],"setting,":[11],"from":[12,49,104],"collections":[13],"where":[14,30,176],"labels":[15,101],"are":[16,92,160,178],"only":[17],"partially":[18],"available.":[19,186],"address":[21],"this":[22,123],"framework":[26],"multiview":[28,63],"learning,":[29],"different":[31,35,68,73,150],"languages":[32],"correspond":[33],"to":[34,47],"views":[36,133,180],"same":[39],"document,":[40],"combined":[41],"with":[42],"semi-supervised":[43,65,136],"order":[46],"benefit":[48],"unlabeled":[50,90,106],"documents.":[51],"rely":[53],"on":[54,76,110,134,139],"two":[55],"techniques,":[56],"coregularization":[57,156],"and":[58,64,99,125,157,162,172,182],"consensus-based":[59,158],"self-training,":[60],"that":[61,82,155,163],"combine":[62],"ways.":[69],"Our":[70,152],"approach":[71],"trains":[72],"monolingual":[74],"each":[77],"views,":[80],"such":[81],"classifiers'":[84],"decisions":[85],"over":[86],"set":[88,108],"examples":[91,103],"agreement":[94],"as":[95,97],"much":[96],"possible,":[98],"iteratively":[100],"new":[102],"another":[105],"training":[107,120],"based":[109],"consensus":[112],"across":[113],"language-specific":[114],"classifiers.":[115],"derive":[117],"boosting-based":[119],"algorithm":[121],"for":[122],"task,":[124],"analyze":[126],"impact":[128],"number":[131],"results":[138],"extension":[142],"Reuters":[145],"RCV1/RCV2":[146],"corpus":[147],"using":[148],"five":[149],"languages.":[151],"experiments":[153],"show":[154],"self-training":[159],"complementary":[161],"their":[164],"combination":[165],"is":[166],"especially":[167],"effective":[168],"interesting":[171],"very":[173],"common":[174],"situation":[175],"there":[177],"few":[179,183],"(languages)":[181],"labeled":[184],"documents":[185]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
