{"id":"https://openalex.org/W7161964952","doi":"https://doi.org/10.1145/3786167.3788430","title":"A Catalogue of Evaluation Metrics for LLM-Based Multi-Agent Frameworks in Software Engineering","display_name":"A Catalogue of Evaluation Metrics for LLM-Based Multi-Agent Frameworks in Software Engineering","publication_year":2026,"publication_date":"2026-04-12","ids":{"openalex":"https://openalex.org/W7161964952","doi":"https://doi.org/10.1145/3786167.3788430"},"language":null,"primary_location":{"id":"doi:10.1145/3786167.3788430","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3786167.3788430","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Workshop on Agentic Engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3786167.3788430","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136645548","display_name":"Ingrid Lima","orcid":"https://orcid.org/0009-0004-2443-5188"},"institutions":[{"id":"https://openalex.org/I102939073","display_name":"Universidade Estadual do Cear\u00e1","ror":"https://ror.org/00sec1m50","country_code":"BR","type":"education","lineage":["https://openalex.org/I102939073"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Ingrid Lima","raw_affiliation_strings":["State University of Cear\u00e1, Brazil, Fortaleza, Brazil"],"raw_orcid":"https://orcid.org/0009-0004-2443-5188","affiliations":[{"raw_affiliation_string":"State University of Cear\u00e1, Brazil, Fortaleza, Brazil","institution_ids":["https://openalex.org/I102939073"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136709851","display_name":"Vitor Fontenele de Oliveira Linhares","orcid":"https://orcid.org/0009-0006-3377-4573"},"institutions":[{"id":"https://openalex.org/I102939073","display_name":"Universidade Estadual do Cear\u00e1","ror":"https://ror.org/00sec1m50","country_code":"BR","type":"education","lineage":["https://openalex.org/I102939073"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Vitor Linhares","raw_affiliation_strings":["State University of Cear\u00e1, Brazil, Fortaleza, Brazil"],"raw_orcid":"https://orcid.org/0009-0006-3377-4573","affiliations":[{"raw_affiliation_string":"State University of Cear\u00e1, Brazil, Fortaleza, Brazil","institution_ids":["https://openalex.org/I102939073"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015456641","display_name":"Anderson Martins Gomes","orcid":null},"institutions":[{"id":"https://openalex.org/I102939073","display_name":"Universidade Estadual do Cear\u00e1","ror":"https://ror.org/00sec1m50","country_code":"BR","type":"education","lineage":["https://openalex.org/I102939073"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Anderson Martins Gomes","raw_affiliation_strings":["State University of Cear\u00e1, Brazil, Fortaleza, Brazil"],"raw_orcid":"https://orcid.org/0009-0001-3910-3707","affiliations":[{"raw_affiliation_string":"State University of Cear\u00e1, Brazil, Fortaleza, Brazil","institution_ids":["https://openalex.org/I102939073"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5136721751","display_name":"Paulo Henrique M. Maia","orcid":"https://orcid.org/0000-0002-6683-6869"},"institutions":[{"id":"https://openalex.org/I102939073","display_name":"Universidade Estadual do Cear\u00e1","ror":"https://ror.org/00sec1m50","country_code":"BR","type":"education","lineage":["https://openalex.org/I102939073"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Paulo Henrique Maia","raw_affiliation_strings":["State University of Ceara, Brazil, Fortaleza, Brazil"],"raw_orcid":"https://orcid.org/0000-0002-6683-6869","affiliations":[{"raw_affiliation_string":"State University of Ceara, Brazil, Fortaleza, Brazil","institution_ids":["https://openalex.org/I102939073"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.83488978,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"120","last_page":"124"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.5105999708175659,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.5105999708175659,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.14159999787807465,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10639","display_name":"Advanced Software Engineering Methodologies","score":0.09690000116825104,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4521999955177307},{"id":"https://openalex.org/keywords/software-quality","display_name":"Software quality","score":0.44429999589920044},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4068000018596649},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.33489999175071716},{"id":"https://openalex.org/keywords/software-metric","display_name":"Software metric","score":0.3319000005722046},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.3188999891281128},{"id":"https://openalex.org/keywords/software-quality-assurance","display_name":"Software quality assurance","score":0.3043000102043152}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7085000276565552},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.583899974822998},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4521999955177307},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.44429999589920044},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4068000018596649},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3571000099182129},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.33489999175071716},{"id":"https://openalex.org/C82214349","wikidata":"https://www.wikidata.org/wiki/Q657339","display_name":"Software metric","level":5,"score":0.3319000005722046},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.3188999891281128},{"id":"https://openalex.org/C2776969324","wikidata":"https://www.wikidata.org/wiki/Q613918","display_name":"Software quality assurance","level":5,"score":0.3043000102043152},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C14224292","wikidata":"https://www.wikidata.org/wiki/Q13600188","display_name":"Conceptual framework","level":2,"score":0.3034000098705292},{"id":"https://openalex.org/C137287247","wikidata":"https://www.wikidata.org/wiki/Q1329550","display_name":"Static program analysis","level":4,"score":0.2971999943256378},{"id":"https://openalex.org/C204983608","wikidata":"https://www.wikidata.org/wiki/Q2111958","display_name":"Productivity","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C184356942","wikidata":"https://www.wikidata.org/wiki/Q830382","display_name":"Best practice","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2615000009536743},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.2583000063896179},{"id":"https://openalex.org/C186846655","wikidata":"https://www.wikidata.org/wiki/Q3398377","display_name":"Software construction","level":4,"score":0.2581999897956848}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3786167.3788430","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3786167.3788430","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Workshop on Agentic Engineering","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3786167.3788430","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3786167.3788430","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Workshop on Agentic Engineering","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.4242388904094696},{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4148225486278534}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W4406325768","https://openalex.org/W4406698707","https://openalex.org/W4411549450","https://openalex.org/W4411950659","https://openalex.org/W4412888577","https://openalex.org/W4412888697","https://openalex.org/W4412944767"],"related_works":[],"abstract_inverted_index":{"LLM-based":[0],"Multi-Agent":[1],"(LMA)":[2],"frameworks":[3,58,149,157],"have":[4],"recently":[5],"gained":[6],"traction":[7],"in":[8,34,115],"software":[9,159],"engineering,":[10],"attracting":[11],"attention":[12],"due":[13,39],"to":[14,17,40],"their":[15,62],"potential":[16],"enhance":[18],"productivity":[19],"by":[20,94],"automating":[21],"tasks":[22],"such":[23],"as":[24],"code":[25],"generation,":[26],"testing,":[27],"and":[28,56,73,100,129,145,150],"quality":[29],"assurance.":[30],"However,":[31],"evaluation":[32,91],"practices":[33],"this":[35,84],"area":[36],"remain":[37],"fragmented":[38],"the":[41,67,71,152],"lack":[42],"of":[43,70,90,119,155],"standardised":[44],"methodologies.":[45],"Frameworks":[46],"often":[47],"rely":[48],"on":[49],"self-defined":[50],"or":[51],"inconsistent":[52],"metrics,":[53,107],"hindering":[54],"reproducibility":[55],"making":[57],"appear":[59],"optimal":[60],"within":[61],"own":[63],"settings,":[64],"which":[65],"obscures":[66],"true":[68],"state":[69],"art":[72],"can":[74],"produce":[75],"artificially":[76],"inflated":[77],"performance":[78],"results.":[79],"To":[80],"address":[81],"these":[82],"challenges,":[83],"study":[85],"conducts":[86],"a":[87,116,134],"comprehensive":[88],"analysis":[89],"metrics":[92,121],"used":[93],"state-of-the-art":[95],"LMA":[96,140,156],"frameworks,":[97],"revealing":[98],"inconsistencies":[99],"conceptual":[101],"gaps.":[102],"We":[103],"propose":[104],"12":[105],"novel":[106],"combine":[108],"them":[109],"with":[110],"26":[111],"existing":[112],"ones,":[113],"resulting":[114],"structured":[117,135],"catalogue":[118],"38":[120],"across":[122],"four":[123],"technical":[124],"categories:":[125],"Outcome,":[126],"Process,":[127],"Product,":[128],"Framework.":[130],"These":[131],"contributions":[132],"provide":[133],"foundation":[136],"for":[137,158],"rigorous,":[138],"reproducible":[139],"framework":[141],"evaluation,":[142],"enabling":[143],"direct":[144],"meaningful":[146],"comparisons":[147],"between":[148],"supporting":[151],"systematic":[153],"advancement":[154],"engineering.":[160]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-22T00:00:00"}
