From 74022b7a6a78a37570e2d9255284d83e51281465 Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Fri, 19 Jun 2026 18:22:31 +0200 Subject: [PATCH] feat: Expand the jar checks to stricter agent jar validation * Maintains the size check. * Verify some required entries * Ensure there's a minimum number of classes in the whole jar * Ensure products are correctly included and have at least one class * Light size check on the indexes * Fixed list of packages that should not appear in the jar * Run checks as part of the build job, to catch issues earlier --- .gitlab-ci.yml | 2 +- dd-java-agent/build.gradle | 113 ++++++++++++++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index eee9eadfc68..c31f251ef52 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -336,7 +336,7 @@ build: script: - if [ $CI_PIPELINE_SOURCE == "schedule" ] ; then ./gradlew resolveAndLockAll --write-locks $GRADLE_ARGS; fi - ./gradlew --version - - ./gradlew clean :dd-java-agent:shadowJar :dd-trace-api:jar :dd-trace-ot:shadowJar -PskipTests $GRADLE_ARGS + - ./gradlew clean :dd-java-agent:shadowJar :dd-java-agent:check :dd-trace-api:jar :dd-trace-ot:shadowJar -PskipTests -x spotlessCheck $GRADLE_ARGS - echo UPSTREAM_TRACER_VERSION=$(java -jar workspace/dd-java-agent/build/libs/*.jar) >> upstream.env - echo "BUILD_JOB_NAME=$CI_JOB_NAME" >> build.env - echo "BUILD_JOB_ID=$CI_JOB_ID" >> build.env diff --git a/dd-java-agent/build.gradle b/dd-java-agent/build.gradle index a2001cb4ee8..ae78df4410c 100644 --- a/dd-java-agent/build.gradle +++ b/dd-java-agent/build.gradle @@ -1,5 +1,6 @@ import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar import java.util.concurrent.atomic.AtomicBoolean +import java.util.jar.JarFile plugins { id 'com.gradleup.shadow' @@ -19,6 +20,10 @@ configurations { def includedAgentDir = project.layout.buildDirectory.dir("generated/included") def includedJarFileTree = fileTree(includedAgentDir) +// Populated automatically by includeShadowJar for every product dir registered in this build. +// Used by verifyAgentJarContents to check that all included products land in the assembled jar. +ext.includedProductPrefixes = objects.setProperty(String) + def pomPropertiesDir = project.layout.buildDirectory.dir("generated/maven-metadata") def pomPropertiesFileTree = fileTree(pomPropertiesDir) @@ -183,6 +188,8 @@ def generalShadowJarConfig(ShadowJar shadowJarTask) { } def includeShadowJar(TaskProvider includedShadowJarTask, String agentDir, FileTree includedJarFileTree) { + includedProductPrefixes.add(agentDir) + def expandTask = project.tasks.register("expandAgentShadowJar${agentDir.capitalize()}", Sync) { it.group = LifecycleBasePlugin.BUILD_GROUP it.description = "Expand the included shadow jar into the agent jar under ${agentDir}" @@ -468,16 +475,108 @@ tasks.withType(Test).configureEach { dependsOn "shadowJar" } -tasks.register('checkAgentJarSize') { +tasks.register('verifyAgentJarContents') { + group = LifecycleBasePlugin.VERIFICATION_GROUP + description = 'Verify the agent jar contains required entries and meets structural invariants' + + def jarProvider = tasks.named('shadowJar', ShadowJar).flatMap { it.archiveFile } + inputs.file(jarProvider) + inputs.property('productPrefixes', includedProductPrefixes) + outputs.file(project.layout.buildDirectory.file("tmp/${it.name}/.verified")) + doLast { - // Arbitrary limit to prevent unintentional increases to the agent jar size - // Raise or lower as required - assert tasks.named("shadowJar", ShadowJar).get().archiveFile.get().getAsFile().length() <= 33 * 1024 * 1024 - } + File jarFile = jarProvider.get().asFile + List failures = [] + Map entries = [:] + + // Jar size ceiling — raise only when the growth is intentional + def sizeCeiling = 33L * 1024 * 1024 + if (jarFile.length() > sizeCeiling) { + failures << "Jar size ${jarFile.length()} B exceeds ceiling ${sizeCeiling} B (33 MiB)" + } - dependsOn "shadowJar" + // Inspect jar content + new JarFile(jarFile).withCloseable { jf -> + jf.entries().each { ze -> entries[ze.name] = ze.size } + } + + // Required entries + [ + // Runtime index, loaded at startup to resolve classdata paths + // Generated by :dd-java-agent:generateAgentJarIndex + 'dd-java-agent.index', + // Premain-Class: Java 6 pre check + 'datadog/trace/bootstrap/AgentPreCheck.class', + // Agent-Class: main bootstrap entry point + 'datadog/trace/bootstrap/AgentBootstrap.class', + // Additional checks for Java 11 + 'datadog/trace/bootstrap/AdvancedAgentChecks.class', + // Instrumentation indexes + // * :dd-java-agent:instrumentation:generateInstrumenterIndex + // * :dd-java-agent:instrumentation:generateKnownTypesIndex + // Without instrumenter.index, zero instrumentations load at runtime. + 'inst/instrumenter.index', + 'inst/known-types.index', + // OTel drop-in support, embedded via otel-bootstrap + otel-shim shadowInclude + 'datadog/trace/bootstrap/otel/api/', + 'datadog/trace/bootstrap/otel/context/', + 'datadog/trace/bootstrap/otel/shim/', + 'META-INF/maven/com.datadoghq/dd-java-agent/pom.properties', + ].each { required -> + if (!entries.containsKey(required)) { + failures << "Missing required entry: ${required}" + } + } + + // Sanity check on the minimum number of classes; update as needed. Set to about 98% of that number. + def classCount = entries.keySet().count { it.endsWith('.class') || it.endsWith('.classdata') } + def classFloor = 17_000 // a bit moe than 98% of 17,279 at time of writing + if (classCount < classFloor) { + failures << "Class count ${classCount} is below floor ${classFloor}" + } + + // Each registered product must contribute at least one .classdata entry. + // Catches a product wired into the build but producing no classes. + def classdataPrefixes = entries.keySet() + .findAll { it.endsWith('.classdata') } + .collect { it.split('/')[0] } + .toSet() + includedProductPrefixes.get().each { dir -> + if (!classdataPrefixes.contains(dir)) { + failures << "Product '${dir}' has no .classdata entries in the assembled jar" + } + } + + // All *.index files in the jar must be non-empty + entries.findAll { name, size -> name.endsWith('.index') && size == 0 }.each { name, _ -> + failures << "Empty index file: ${name}" + } + + // Packages that must not appear anywhere in the jar after relocation. + // NOTE: Hardcoded to catch accidental removal of relocate() calls in generalShadowJarConfig or in a nested shadow jar. + def productPrefixes = includedProductPrefixes.get() + ['org/slf4j/', 'org/jctools/', 'net/jpountz/', 'org/objectweb/asm/', 'io/airlift/'].each { pkg -> + def leaked = entries.keySet().findAll { entry -> + entry.startsWith(pkg) || productPrefixes.any { prefix -> entry.startsWith("${prefix}/${pkg}") } + } + if (!leaked.empty) { + def sample = leaked.take(3).toString() + failures << "Unrelocated package '${pkg}': ${sample}${leaked.size() > 3 ? ' ...' : ''}" + } + } + + if (!failures.empty) { + throw new GradleException( + "Agent jar verification failed (${failures.size()} issue(s)):\n" + + failures.collect { " - ${it}" }.join('\n')) + } + + def marker = outputs.files.singleFile + marker.parentFile.mkdirs() + marker.text = 'verified' + } } tasks.named('check') { - dependsOn 'checkAgentJarSize' + dependsOn 'verifyAgentJarContents' }