制品库磁盘占用

制品库磁盘占用的查看需要开启Nexus的脚本执行功能，然后使用Task运行脚本查看制品库的磁盘占用

开启脚本执行功能

<nexus_home>/sonatype-work/etc/nexus.properties下新增如下字段，重启nexus

nexus.scripts.allowCreation=true

创建脚本执行任务

脚本内容如下，该脚本来自官方提供：

      
/*
 * Sonatype Nexus (TM) Open Source Version
 * Copyright (c) 2008-present Sonatype, Inc.
 * All rights reserved. Includes the third-party code listed at http://links.sonatype.com/products/nexus/oss/attributions.
 *
 * This program and the accompanying materials are made available under the terms of the Eclipse Public License Version 1.0,
 * which accompanies this distribution and is available at http://www.eclipse.org/legal/epl-v10.html.
 *
 * Sonatype Nexus (TM) Professional Version is available from Sonatype, Inc. "Sonatype" and "Sonatype Nexus" are trademarks
 * of Sonatype, Inc. Apache Maven is a trademark of the Apache Software Foundation. M2eclipse is a trademark of the
 * Eclipse Foundation. All other trademarks are the property of their respective owners.
*/

/*
 * Utility script that scans blobstores and reads the asset properties files within to summarize which repositories
 * are using the blob store, and how much space each is consuming and how much space could potentially be reclaimed by
 * running a compact blobstore task.
 *
 * The script retrieves the blobstore locations from the Nexus system and also all defined repositories.
 *
 * It is possible to specify a whitelist of repository names *OR* a blacklist (whitelist takes priority)
 * If a whitelist is provided, only those repositories whitelisted will be included.
 * If a blacklist is provided (and no whitelist), any repositories that are blacklisted will be omitted.
 *
 * Any empty repositories are also included.
 *
 * The script tabulates both the total size, and the size that could be reclaimed by performing a compact blob store
 * task.
 *
 * Script was developed to run as an 'Execute Script' task within Nexus Repository Manager.
 *
 * ==== CHANGE LOG ====
 * May 10, 2022
 * - fix Windows path matching
 * May 9, 2022
 * - fix for proper rethrow handling exception caused by failed properties file processing
 * - reduce heap memory by not putting all paths to blob properties files into List object
 * - improve file path matcher patterns to exclude non .properties files and only include files under ./content
 */

 /* ---------------- BEGIN CONFIGURABLE SECTION -------------*

 * Whitelist - a list of repository names that should be the only items included.
 *
 *   For example: REPOSITORY_WHITELIST = ['maven-central', 'npm-hosted']
 */

REPOSITORY_WHITELIST = []

/* Blacklist - a list of repository names that should not be included.
 *   This will only apply if REPOSITORY_WHITELIST is not set
 *
 *   For example: REPOSITORY_BLACKLIST = ['maven-central', 'npm-hosted']
 */

REPOSITORY_BLACKLIST = []

/* ---------------- END CONFIGURABLE SECTION ---------------*/

import groovy.json.JsonOutput
import java.nio.file.FileSystems
import java.nio.file.Path
import java.nio.file.PathMatcher
import java.text.SimpleDateFormat
import org.slf4j.LoggerFactory
import org.sonatype.nexus.common.app.ApplicationDirectories
import org.sonatype.nexus.internal.app.ApplicationDirectoriesImpl

import static groovy.io.FileType.FILES

def log = LoggerFactory.getLogger(this.class)

ApplicationDirectories applicationDirectories =
    (ApplicationDirectories)container.lookup(ApplicationDirectoriesImpl.class.name)

Map<String,File> blobStoreDirectories = [:]
hasWhitelist = REPOSITORY_WHITELIST.size() > 0
hasBlacklist = !hasWhitelist && REPOSITORY_BLACKLIST.size() > 0

String SEP = FileSystems.getDefault().getSeparator()
if ('\\' == SEP) {
  SEP = "${SEP}${SEP}"  // escape back slashes on windows so path matchers work correctly
log.info("Treating file system as using Windows path separators.")
}

def EXCLUDE_PATTERNS = "glob:{" +
    "**${SEP}metadata.properties," +
    "**${SEP}*metrics.properties," +
    "**${SEP}*.bytes," +
    "**${SEP}tmp*," +
    "**${SEP}*deletions.index," +
    "**${SEP}*.DS_Store}"
log.info("Global Blobstore exclude patterns: {}", EXCLUDE_PATTERNS)
PathMatcher EXCLUDE_MATCHER = FileSystems.getDefault().getPathMatcher(EXCLUDE_PATTERNS)


//Default location of results is the Nexus temporary directory
File resultsFileLocation = applicationDirectories.getTemporaryDirectory()

Map<String, BlobStatistics> blobStatCollection = [:].withDefault { 0 }

class BlobStatistics
{
  int totalRepoNameMissingCount = 0
  long totalBlobStoreBytes = 0
  long totalReclaimableBytes = 0
  Map<String, RepoStatistics> repositories = [:]
}

class RepoStatistics {
  long totalBytes = 0
  long reclaimableBytes = 0
}

def collectMetrics(final BlobStatistics blobstat, Set<String> unmapped,
                   final Properties properties, final File propertiesFile) {
  def repo = properties.'@Bucket.repo-name'
  if(repo == null && properties.'@BlobStore.direct-path') {
    repo = 'SYSTEM:direct-path'
  }
  if(repo == null) {
    // unexpected - log the unexpected condition
    if(blobstat.totalRepoNameMissingCount <= 50){
      log.warn('Repository name missing from {} : {}', propertiesFile.absolutePath, properties)
      log.info('full details: {}', properties)
    }
    blobstat.totalRepoNameMissingCount++
  } else {
    if (!blobstat.repositories.containsKey(repo)) {
      if (!unmapped.contains(repo)) {
        if (!repo.equals('SYSTEM:direct-path')) {
          log.info('Found unknown repository in {}: {}', propertiesFile.absolutePath, repo)
        }
        blobstat.repositories.put(repo as String, new RepoStatistics())
      }
    }

    if (blobstat.repositories.containsKey(repo)) {
      blobstat.repositories."$repo".totalBytes += (properties.size as long)
      if (!repo.equals('SYSTEM:direct-path')) {
        blobstat.totalBlobStoreBytes += (properties.size as long)
      }

      if (properties.'deleted') {
        blobstat.repositories."$repo".reclaimableBytes += (properties.size as long)
        if (!repo.equals('SYSTEM:direct-path')) {
          blobstat.totalReclaimableBytes += (properties.size as long)
        }
      }
    }
  }
}

def passesWhiteBlackList(final String name) {
  if (hasWhitelist) {
    return REPOSITORY_WHITELIST.contains(name)
  }
  if (hasBlacklist) {
    return !REPOSITORY_BLACKLIST.contains(name)
  }
  return true
}

Map<String, Map<String, Boolean>> storeRepositoryLookup = [:].withDefault { [:] }

repository.getRepositoryManager().browse().each { repo ->
  def blobStoreName = repo.properties.configuration.attributes.storage.blobStoreName
  storeRepositoryLookup.get(blobStoreName).put(repo.name, passesWhiteBlackList(repo.name))
}

blobStore.getBlobStoreManager().browse().each { blobstore ->
  //check that this blobstore is not a group (3.15.0+)
  if (blobstore.getProperties().getOrDefault('groupable',true)) {
    //S3 stores currently cannot be analysed via this script, so ignore (3.12.0+)
    if (blobstore.getProperties().get("blobStoreConfiguration").type == "S3") {
      log.info("Ignoring blobstore {} as it is using S3",
          blobstore.getProperties().get("blobStoreConfiguration").name);
    }
    else {
      try {
        blobstoreName = blobstore.getProperties().get("blobStoreConfiguration").name
        blobStoreDirectories[blobstoreName] = blobstore.getProperties().get("absoluteBlobDir").toFile()
      }
      catch (Exception ex) {
        log.warn('Unable to add blobstore {} of type {}: {}',
            blobstore.getProperties().get("blobStoreConfiguration").name,
            blobstore.getProperties().get("blobStoreConfiguration").type, ex.getMessage())
        log.info('details: {}', blobstore.getProperties())
      }
    }
  }
  else {
    log.info("Ignoring blobstore {} as it is a group store",
        blobstore.getProperties().get("blobStoreConfiguration").name);
  }
}

log.info('Blob Storage scan STARTED.')
blobStoreDirectories.each { blobStore ->
  Path contentDir = blobStore.value.toPath().resolve('content')
  log.info('Scanning blobstore {}, root dir {}, content dir {}', blobStore.key, blobStore.value.absolutePath, contentDir)

  BlobStatistics blobStat = new BlobStatistics()

  Set<String> unmapped = new HashSet<>()
  storeRepositoryLookup[blobStore.value.getName()].each { key, value ->
    if (value) {
      blobStat.repositories.put(key, new RepoStatistics())
    } else {
      unmapped.add(key)
    }
  }

  def blobstoreDir = new File(blobStore.value.path)
  def includePattern = "glob:**${SEP}${blobstoreDir.getName()}${SEP}content${SEP}**${SEP}*.properties"
  PathMatcher INCLUDE_MATCHER = FileSystems.getDefault().getPathMatcher(includePattern)
  log.info("Looking for blob properties files matching: ${includePattern}")
  contentDir.eachFileRecurse(FILES) { p ->
    if (!EXCLUDE_MATCHER.matches(p) && INCLUDE_MATCHER.matches(p) ) {
      File propertiesFile = p.toFile()
      def properties = new Properties()
      try {
        propertiesFile.withInputStream { is ->
          properties.load(is)
        }
      } catch (FileNotFoundException ex) {
        log.warn("File not found '{}', skipping", propertiesFile.getCanonicalPath())
      } catch (Exception e) {
        log.error('Unable to process {}', propertiesFile.getAbsolutePath(), e)
        throw e
      }
      collectMetrics(blobStat, unmapped, properties, propertiesFile)
    }
  }
  blobStatCollection.put(blobStore.value.getName(), blobStat)
}

blobStatCollection.each() { blobStoreName, blobStat ->
  RepoStatistics directPath = blobStat.repositories.remove('SYSTEM:direct-path')
  if (directPath!=null) {
    log.info("Direct-Path size in blobstore {}: {} - reclaimable: {}", blobStoreName, directPath.totalBytes, directPath.reclaimableBytes)
  }
}

def filename = "repoSizes-${new SimpleDateFormat("yyyyMMdd-HHmmss").format(new Date())}.json"
File resultsFile = new File(resultsFileLocation, filename)
resultsFile.withWriter { Writer writer ->
  writer << JsonOutput.prettyPrint(JsonOutput
      .toJson(blobStatCollection
          .findAll {a, b -> b.repositories.size() > 0}
          .toSorted {a, b -> b.value.totalBlobStoreBytes <=> a.value.totalBlobStoreBytes}))
}
log.info('Blob Storage scan ENDED. Report at {}', resultsFile.absolutePath)

有了这个脚本后，我们可以创建一个脚本执行任务去执行该脚本

最后执行该任务就行

查看执行结果

脚本执行的结果可以在Log Viewer看到，此处就是提示了占用分析报告的存放位置

同样，分析上述的groovy脚本，也提到了上图中Log Viewer标红的日志：

那么我们打开分析报告后，如下所示：

清除磁盘占用

上述的磁盘占用结果有个字段“reclaimableBytes”
Nexus的官方解释：
For each repository, totalbytes indicates how much space is being used and reclaimableBytes indicates how much space may be reclaimed by running the Compact Blob Store maintenance task.
也就是说，这些reclaimableBytes占用的内容是一些旧的制品信息（可能是旧的版本，或者不再使用的内容）。可以被Task（Compact Blob Store）清除的。这个任务是Nexus自带的，可以在管理员页面找到，具体操作就不再详述。

这些旧的内容主要来自以下源头：

已删除的制品库残留： 上述结果还提到了一个库“npm_ruijie”，这是很早被删除的库，但是删库的动作并不会清除制品哦，制品仍会残留并占用磁盘。
现存的制品库旧制品残留： 不仅如此，现存的库同样有可回收的残留，以代理库“npm_proxy_ruijie”来说，每次拉取制品（同版本但内容不同），并不是在覆盖旧的制品缓存。旧的缓存都是属于“reclaimableBytes”的。

目录CONTENT

Nexus的磁盘清理以及制品库占用分析

制品库磁盘占用

开启脚本执行功能

创建脚本执行任务

查看执行结果

清除磁盘占用

评论区