{"id":"https://openalex.org/W4406148671","doi":"https://doi.org/10.1145/3704323.3704362","title":"Large Language Model Data Augmentation for Text-Pair Classification Tasks","display_name":"Large Language Model Data Augmentation for Text-Pair Classification Tasks","publication_year":2024,"publication_date":"2024-10-25","ids":{"openalex":"https://openalex.org/W4406148671","doi":"https://doi.org/10.1145/3704323.3704362"},"language":"en","primary_location":{"id":"doi:10.1145/3704323.3704362","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3704323.3704362","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3704323.3704362?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 13th International Conference on Computing and Pattern Recognition","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3704323.3704362?download=true","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yuyang Li","orcid":"https://orcid.org/0009-0004-2625-6538"},"institutions":[{"id":"https://openalex.org/I3125743391","display_name":"China University of Geosciences (Beijing)","ror":"https://ror.org/04q6c7p66","country_code":"CN","type":"education","lineage":["https://openalex.org/I3125743391"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuyang Li","raw_affiliation_strings":["China University of Geosciences Beijing,School of Information Engineering, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0004-2625-6538","affiliations":[{"raw_affiliation_string":"China University of Geosciences Beijing,School of Information Engineering, Beijing, China","institution_ids":["https://openalex.org/I3125743391"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030946611","display_name":"Yuqing Zhang","orcid":"https://orcid.org/0000-0001-9150-1369"},"institutions":[{"id":"https://openalex.org/I3125743391","display_name":"China University of Geosciences (Beijing)","ror":"https://ror.org/04q6c7p66","country_code":"CN","type":"education","lineage":["https://openalex.org/I3125743391"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuqing Zhang","raw_affiliation_strings":["China University of Geosciences Beijing,School of Information Engineering, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-9150-1369","affiliations":[{"raw_affiliation_string":"China University of Geosciences Beijing,School of Information Engineering, Beijing, China","institution_ids":["https://openalex.org/I3125743391"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109684348","display_name":"Zelin Du","orcid":null},"institutions":[{"id":"https://openalex.org/I3125743391","display_name":"China University of Geosciences (Beijing)","ror":"https://ror.org/04q6c7p66","country_code":"CN","type":"education","lineage":["https://openalex.org/I3125743391"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zelin Du","raw_affiliation_strings":["China University of Geosciences Beijing,School of Information Engineering, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-2136-114X","affiliations":[{"raw_affiliation_string":"China University of Geosciences Beijing,School of Information Engineering, Beijing, China","institution_ids":["https://openalex.org/I3125743391"]}]},{"author_position":"last","author":{"id":null,"display_name":"Ziqi Guo","orcid":"https://orcid.org/0009-0002-0393-9884"},"institutions":[{"id":"https://openalex.org/I3125743391","display_name":"China University of Geosciences (Beijing)","ror":"https://ror.org/04q6c7p66","country_code":"CN","type":"education","lineage":["https://openalex.org/I3125743391"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziqi Guo","raw_affiliation_strings":["China University of Geosciences Beijing,School of Information Engineering, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-0393-9884","affiliations":[{"raw_affiliation_string":"China University of Geosciences Beijing,School of Information Engineering, Beijing, China","institution_ids":["https://openalex.org/I3125743391"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I3125743391"],"apc_list":null,"apc_paid":null,"fwci":0.3311,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.69400971,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"427","last_page":"433"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8300870656967163},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6807252168655396},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.57342529296875},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5547741055488586},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.5284228920936584},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.13306617736816406}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8300870656967163},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6807252168655396},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.57342529296875},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5547741055488586},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.5284228920936584},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.13306617736816406}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3704323.3704362","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3704323.3704362","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3704323.3704362?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 13th International Conference on Computing and Pattern Recognition","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3704323.3704362","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3704323.3704362","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3704323.3704362?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 13th International Conference on Computing and Pattern Recognition","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4406148671.pdf","grobid_xml":"https://content.openalex.org/works/W4406148671.grobid-xml"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W569478347","https://openalex.org/W1840435438","https://openalex.org/W2550821151","https://openalex.org/W2594990650","https://openalex.org/W2747329762","https://openalex.org/W2889326796","https://openalex.org/W2923014074","https://openalex.org/W2946068894","https://openalex.org/W2963122608","https://openalex.org/W2963545917","https://openalex.org/W2964343359","https://openalex.org/W2971296908","https://openalex.org/W3037032032","https://openalex.org/W3099911888","https://openalex.org/W3103291112","https://openalex.org/W3106171756","https://openalex.org/W3114651185","https://openalex.org/W3174828871","https://openalex.org/W3196750896"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"In":[0],"recent":[1],"years,":[2],"large":[3],"language":[4,14,37],"models":[5,114],"(LLMs)":[6],"have":[7],"demonstrated":[8],"remarkable":[9],"capabilities":[10,112],"across":[11,119],"various":[12],"natural":[13,36],"processing":[15],"tasks.This":[16],"study":[17],"explores":[18],"the":[19,58,71,93,101,110,143],"application":[20],"of":[21,55,95,113,145],"LLMs":[22,45],"for":[23],"data":[24,60,73,104,118,129,137],"augmentation":[25,97,130],"in":[26],"text-pair":[27,146],"classification":[28,147],"tasks,":[29],"such":[30],"as":[31],"semantic":[32],"textual":[33],"similarity":[34],"and":[35,49,53,99,106,122,141],"inference.We":[38],"propose":[39],"a":[40,132],"novel":[41],"framework":[42],"that":[43,70,127],"leverages":[44],"to":[46,84,91,135],"generate":[47],"diverse":[48],"contextually":[50],"relevant":[51],"paraphrases":[52],"transformations":[54],"text-pairs,":[56],"enhancing":[57],"training":[59],"without":[61],"manual":[62],"annotation":[63],"effort.Our":[64],"experiments":[65],"on":[66],"widely-used":[67],"benchmarks":[68],"show":[69],"augmented":[72,117],"not":[74],"only":[75],"improves":[76],"model":[77],"performance":[78],"but":[79],"also":[80],"increases":[81],"its":[82],"robustness":[83],"out-of-domain":[85],"examples.We":[86],"perform":[87],"extensive":[88],"ablation":[89],"studies":[90],"understand":[92],"contribution":[94],"different":[96],"strategies":[98],"analyze":[100],"trade-offs":[102],"between":[103],"diversity":[105],"noise.Additionally,":[107],"we":[108],"assess":[109],"generalization":[111],"trained":[115],"with":[116],"multiple":[120],"architectures":[121],"dataset":[123],"sizes.The":[124],"results":[125],"suggest":[126],"LLM-driven":[128],"is":[131],"promising":[133],"approach":[134],"overcome":[136],"scarcity,":[138],"reduce":[139],"overfitting,":[140],"enhance":[142],"adaptability":[144],"systems.":[148]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
