2021年4月25日星期日

Why do i have "OutOfMemoryError" in my Kmeans CuPy code?

im really new for gpu coding i found this Kmeans cupy code my propouse is work with a large data base (n,3) for example to realize about the timing difference on gpu and cpu , i wanna have a huge number of clusters but i am getting a memory management error. Can someone give me the route I should take to research and fix it, i already research but i have not a clear start yet.

import contextlib  import time    import cupy  import matplotlib.pyplot as plt  import numpy    @contextlib.contextmanager  def timer(message):      cupy.cuda.Stream.null.synchronize()      start = time.time()      yield      cupy.cuda.Stream.null.synchronize()      end = time.time()      print('%s:  %f sec' % (message, end - start))          var_kernel = cupy.ElementwiseKernel(      'T x0, T x1, T c0, T c1', 'T out',      'out = (x0 - c0) * (x0 - c0) + (x1 - c1) * (x1 - c1)',      'var_kernel'  )  sum_kernel = cupy.ReductionKernel(      'T x, S mask', 'T out',      'mask ? x : 0',      'a + b', 'out = a', '0',      'sum_kernel'  )  count_kernel = cupy.ReductionKernel(      'T mask', 'float32 out',      'mask ? 1.0 : 0.0',      'a + b', 'out = a', '0.0',      'count_kernel'  )              def fit_xp(X, n_clusters, max_iter):      assert X.ndim == 2         # Get NumPy or CuPy module from the supplied array.      xp = cupy.get_array_module(X)        n_samples = len(X)            # Make an array to store the labels indicating which cluster each sample is      # contained.      pred = xp.zeros(n_samples)            # Choose the initial centroid for each cluster.      initial_indexes = xp.random.choice(n_samples, n_clusters, replace=False)      centers = X[initial_indexes]            for _ in range(max_iter):          # Compute the new label for each sample.          distances = xp.linalg.norm(X[:, None, :] - centers[None, :, :], axis=2)          new_pred = xp.argmin(distances, axis=1)                # If the label is not changed for each sample, we suppose the          # algorithm has converged and exit from the loop.          if xp.all(new_pred == pred):              break          pred = new_pred                # Compute the new centroid for each cluster.          i = xp.arange(n_clusters)          mask = pred == i[:, None]          sums = xp.where(mask[:, :, None], X, 0).sum(axis=1)          counts = xp.count_nonzero(mask, axis=1).reshape((n_clusters, 1))          centers = sums / counts            return centers, pred              def fit_custom(X, n_clusters, max_iter):      assert X.ndim == 2            n_samples = len(X)            pred = cupy.zeros(n_samples,dtype='float32')            initial_indexes = cupy.random.choice(n_samples, n_clusters, replace=False)      centers = X[initial_indexes]            for _ in range(max_iter):          distances = var_kernel(X[:, None, 0], X[:, None, 1],                                     centers[None, :, 1], centers[None, :, 0])          new_pred = cupy.argmin(distances, axis=1)          if cupy.all(new_pred == pred):              break          pred = new_pred                i = cupy.arange(n_clusters)          mask = pred == i[:, None]          sums = sum_kernel(X, mask[:, :, None], axis=1)          counts = count_kernel(mask, axis=1).reshape((n_clusters, 1))          centers = sums / counts            return centers, pred              def draw(X, n_clusters, centers, pred, output):      # Plot the samples and centroids of the fitted clusters into an image file.      for i in range(n_clusters):          labels = X[pred == i]          plt.scatter(labels[:, 0], labels[:, 1], c=numpy.random.rand(3))      plt.scatter(          centers[:, 0], centers[:, 1], s=120, marker='s', facecolors='y',          edgecolors='k')      plt.savefig(output)            def run_cpu(gpuid, n_clusters, num, max_iter, use_custom_kernel):##, output      samples = numpy.random.randn(num, 3)      X_train = numpy.r_[samples + 1, samples - 1]            with timer(' CPU '):          centers, pred = fit_xp(X_train, n_clusters, max_iter)                    def run_gpu(gpuid, n_clusters, num, max_iter, use_custom_kernel):##, output      samples = numpy.random.randn(num, 3)      X_train = numpy.r_[samples + 1, samples - 1]            with cupy.cuda.Device(gpuid):          X_train = cupy.asarray(X_train)                with timer(' GPU '):              if use_custom_kernel:                  centers, pred = fit_custom(X_train, n_clusters, max_iter)              else:                  centers, pred = fit_xp(X_train, n_clusters, max_iter)  

btw i am working in colab pro 25GB(RAM), the code is working with n_clusters=200 and num= 1000000 but if i use bigger numbers the error appear, i am running the code like this:

run_gpu(0,200,1000000,10,True)  

This is the error that i have

Any suggestion will be welcome, thanks for your time.

https://stackoverflow.com/questions/67231114/why-do-i-have-outofmemoryerror-in-my-kmeans-cupy-code April 23, 2021 at 09:47PM

没有评论:

发表评论