package org.apache.sysml.runtime.instructions.gpu.context;

import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import jcuda.Pointer;
import jcuda.jcusparse.cusparseHandle;
import jcuda.jcusparse.cusparseMatDescr;
import jcuda.runtime.JCuda;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.parser.DataExpression;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.instructions.cp.CPInstruction;
import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
import org.apache.sysml.runtime.matrix.data.LibMatrixCUDA;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.SparseBlock;
import org.apache.sysml.runtime.matrix.data.SparseBlockCOO;
import org.apache.sysml.runtime.matrix.data.SparseBlockCSR;
import org.apache.sysml.runtime.matrix.data.SparseBlockMCSR;
import org.apache.sysml.utils.GPUStatistics;

/* loaded from: input_file:org/apache/sysml/runtime/instructions/gpu/context/GPUObject.class */
public class GPUObject {
    private static final Log LOG = LogFactory.getLog(GPUObject.class.getName());
    private final GPUContext gpuContext;
    MatrixObject mat;
    Pointer jcudaDenseMatrixPtr = null;
    private CSRPointer jcudaSparseMatrixPtr = null;
    protected boolean dirty = false;
    protected LongAdder readLocks = new LongAdder();
    protected boolean writeLock = false;
    AtomicLong timestamp = new AtomicLong();
    protected boolean isSparse = false;
    final ShadowBuffer shadowBuffer = new ShadowBuffer(this);

    public Pointer getDensePointer() {
        if (this.jcudaDenseMatrixPtr == null && this.shadowBuffer.isBuffered() && getJcudaSparseMatrixPtr() == null) {
            this.shadowBuffer.moveToDevice();
        }
        return this.jcudaDenseMatrixPtr;
    }

    public boolean isDensePointerNull() {
        return this.jcudaDenseMatrixPtr == null;
    }

    public void clearDensePointer() {
        this.jcudaDenseMatrixPtr = null;
        this.shadowBuffer.clearShadowPointer();
    }

    public void setDensePointer(Pointer pointer) {
        if (!isDensePointerNull()) {
            throw new DMLRuntimeException("jcudaDenseMatrixPtr was already allocated for " + this + ", this will cause a memory leak on the GPU");
        }
        this.jcudaDenseMatrixPtr = pointer;
        this.isSparse = false;
        if (LOG.isDebugEnabled()) {
            LOG.debug("Setting dense pointer of size " + getGPUContext().getMemoryManager().getSizeAllocatedGPUPointer(pointer));
        }
        if (getJcudaSparseMatrixPtr() != null) {
            getJcudaSparseMatrixPtr().deallocate();
            this.jcudaSparseMatrixPtr = null;
        }
    }

    public Object clone() {
        GPUObject gPUObject = new GPUObject(this.gpuContext, this.mat);
        gPUObject.dirty = this.dirty;
        gPUObject.readLocks.reset();
        gPUObject.writeLock = false;
        gPUObject.timestamp = new AtomicLong(this.timestamp.get());
        gPUObject.isSparse = this.isSparse;
        try {
            if (!isDensePointerNull()) {
                long numRows = this.mat.getNumRows() * this.mat.getNumColumns() * LibMatrixCUDA.sizeOfDataType;
                gPUObject.setDensePointer(allocate(numRows));
                JCuda.cudaMemcpy(gPUObject.getDensePointer(), getDensePointer(), numRows, 3);
            }
            if (getJcudaSparseMatrixPtr() != null) {
                gPUObject.jcudaSparseMatrixPtr = this.jcudaSparseMatrixPtr.clone((int) this.mat.getNumRows());
            }
            return gPUObject;
        } catch (DMLRuntimeException e) {
            throw new RuntimeException(e);
        }
    }

    private Pointer allocate(long j) {
        return getGPUContext().allocate(null, j);
    }

    private void cudaFreeHelper(Pointer pointer) throws DMLRuntimeException {
        getGPUContext().cudaFreeHelper(null, pointer, DMLScript.EAGER_CUDA_FREE);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public GPUContext getGPUContext() {
        return this.gpuContext;
    }

    public static Pointer transpose(GPUContext gPUContext, Pointer pointer, int i, int i2, int i3, int i4) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : transpose of block of size [" + i + "," + i2 + "], GPUContext=" + gPUContext);
        }
        Pointer one = LibMatrixCUDA.one();
        Pointer zero = LibMatrixCUDA.zero();
        Pointer allocate = gPUContext.allocate(null, i * getDatatypeSizeOf(i2));
        LibMatrixCUDA.cudaSupportFunctions.cublasgeam(gPUContext.getCublasHandle(), 1, 1, i, i2, one, pointer, i3, zero, new Pointer(), i3, allocate, i4);
        return allocate;
    }

    public static CSRPointer columnMajorDenseToRowMajorSparse(GPUContext gPUContext, cusparseHandle cusparsehandle, Pointer pointer, int i, int i2) {
        cusparseMatDescr defaultCuSparseMatrixDescriptor = CSRPointer.getDefaultCuSparseMatrixDescriptor();
        Pointer allocate = gPUContext.allocate(null, getIntSizeOf(i));
        Pointer allocate2 = gPUContext.allocate(null, getIntSizeOf(1L));
        LibMatrixCUDA.cudaSupportFunctions.cusparsennz(cusparsehandle, 0, i, i2, defaultCuSparseMatrixDescriptor, pointer, i, allocate, allocate2);
        int[] iArr = {-1};
        JCuda.cudaMemcpy(Pointer.to(iArr), allocate2, getIntSizeOf(1L), 2);
        if (iArr[0] == -1) {
            throw new DMLRuntimeException("cusparseDnnz did not calculate the correct number of nnz from the sparse-matrix vector mulitply on the GPU");
        }
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : col-major dense size[" + i + "," + i2 + "] to row-major sparse of with nnz = " + iArr[0] + ", GPUContext=" + gPUContext);
        }
        CSRPointer allocateEmpty = CSRPointer.allocateEmpty(gPUContext, iArr[0], i);
        LibMatrixCUDA.cudaSupportFunctions.cusparsedense2csr(cusparsehandle, i, i2, defaultCuSparseMatrixDescriptor, pointer, i, allocate, allocateEmpty.val, allocateEmpty.rowPtr, allocateEmpty.colInd);
        gPUContext.cudaFreeHelper(null, allocate, DMLScript.EAGER_CUDA_FREE);
        gPUContext.cudaFreeHelper(null, allocate2, DMLScript.EAGER_CUDA_FREE);
        return allocateEmpty;
    }

    public CSRPointer getSparseMatrixCudaPointer() {
        return getJcudaSparseMatrixPtr();
    }

    public void setSparseMatrixCudaPointer(CSRPointer cSRPointer) {
        if (this.jcudaSparseMatrixPtr != null) {
            throw new DMLRuntimeException("jcudaSparseMatrixPtr was already allocated for " + this + ", this will cause a memory leak on the GPU");
        }
        this.jcudaSparseMatrixPtr = cSRPointer;
        this.isSparse = true;
        if (isDensePointerNull() || this.shadowBuffer.isBuffered()) {
            return;
        }
        cudaFreeHelper(getDensePointer());
        clearDensePointer();
    }

    public void denseToSparse() {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : dense -> sparse on " + this + ", GPUContext=" + getGPUContext());
        }
        long j = 0;
        if (DMLScript.STATISTICS) {
            j = System.nanoTime();
        }
        cusparseHandle cusparseHandle = getGPUContext().getCusparseHandle();
        if (cusparseHandle == null) {
            throw new DMLRuntimeException("Expected cusparse to be initialized");
        }
        int intExact = toIntExact(this.mat.getNumRows());
        int intExact2 = toIntExact(this.mat.getNumColumns());
        if ((isDensePointerNull() && !this.shadowBuffer.isBuffered()) || !isAllocated()) {
            throw new DMLRuntimeException("Expected allocated dense matrix before denseToSparse() call");
        }
        denseRowMajorToColumnMajor();
        setSparseMatrixCudaPointer(columnMajorDenseToRowMajorSparse(getGPUContext(), cusparseHandle, getDensePointer(), intExact, intExact2));
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaDenseToSparseTime.add(System.nanoTime() - j);
        }
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaDenseToSparseCount.add(1L);
        }
    }

    public void denseRowMajorToColumnMajor() {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : dense Ptr row-major -> col-major on " + this + ", GPUContext=" + getGPUContext());
        }
        int intExact = toIntExact(this.mat.getNumRows());
        int intExact2 = toIntExact(this.mat.getNumColumns());
        if (!isAllocated()) {
            throw new DMLRuntimeException("Error in converting row major to column major : data is not allocated");
        }
        Pointer transpose = transpose(getGPUContext(), getDensePointer(), intExact, intExact2, intExact2, intExact);
        cudaFreeHelper(getDensePointer());
        clearDensePointer();
        setDensePointer(transpose);
    }

    public void denseColumnMajorToRowMajor() {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : dense Ptr row-major -> col-major on " + this + ", GPUContext=" + getGPUContext());
        }
        int intExact = toIntExact(this.mat.getNumRows());
        int intExact2 = toIntExact(this.mat.getNumColumns());
        if (!isAllocated()) {
            throw new DMLRuntimeException("Error in converting column major to row major : data is not allocated");
        }
        Pointer transpose = transpose(getGPUContext(), getDensePointer(), intExact2, intExact, intExact, intExact2);
        cudaFreeHelper(getDensePointer());
        clearDensePointer();
        setDensePointer(transpose);
    }

    public void sparseToDense() {
        sparseToDense(null);
    }

    public void sparseToDense(String str) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : sparse -> dense on " + this + ", GPUContext=" + getGPUContext());
        }
        long j = 0;
        long j2 = 0;
        if (DMLScript.STATISTICS) {
            j = System.nanoTime();
        }
        if (getJcudaSparseMatrixPtr() == null || !isAllocated()) {
            throw new DMLRuntimeException("Expected allocated sparse matrix before sparseToDense() call");
        }
        sparseToColumnMajorDense();
        denseColumnMajorToRowMajor();
        if (DMLScript.STATISTICS) {
            j2 = System.nanoTime();
        }
        if (str != null && DMLScript.FINEGRAINED_STATISTICS) {
            GPUStatistics.maintainCPMiscTimes(str, "s2d", j2 - j);
        }
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaSparseToDenseTime.add(j2 - j);
        }
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaSparseToDenseCount.add(1L);
        }
    }

    public void sparseToColumnMajorDense() {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : sparse -> col-major dense on " + this + ", GPUContext=" + getGPUContext());
        }
        if (getJcudaSparseMatrixPtr() == null || !isAllocated()) {
            throw new DMLRuntimeException("Expected allocated sparse matrix before sparseToDense() call");
        }
        cusparseHandle cusparseHandle = getGPUContext().getCusparseHandle();
        if (cusparseHandle == null) {
            throw new DMLRuntimeException("Expected cusparse to be initialized");
        }
        setDensePointer(getJcudaSparseMatrixPtr().toColumnMajorDenseMatrix(cusparseHandle, null, toIntExact(this.mat.getNumRows()), toIntExact(this.mat.getNumColumns()), null));
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public GPUObject(GPUContext gPUContext, MatrixObject matrixObject) {
        this.mat = null;
        this.gpuContext = gPUContext;
        this.mat = matrixObject;
    }

    public boolean isSparse() {
        return this.isSparse;
    }

    private static long getDatatypeSizeOf(long j) {
        return j * LibMatrixCUDA.sizeOfDataType;
    }

    private static long getIntSizeOf(long j) {
        return j * 4;
    }

    public boolean isAllocated() {
        return (!this.shadowBuffer.isBuffered() && isDensePointerNull() && getJcudaSparseMatrixPtr() == null) ? false : true;
    }

    public void allocateSparseAndEmpty() {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : allocate sparse and empty block on " + this + ", GPUContext=" + getGPUContext());
        }
        setSparseMatrixCudaPointer(CSRPointer.allocateEmpty(getGPUContext(), 0L, this.mat.getNumRows()));
    }

    public void allocateAndFillDense(double d) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : allocate and fill dense with value " + d + " on " + this + ", GPUContext=" + getGPUContext());
        }
        int intExact = toIntExact(this.mat.getNumRows() * this.mat.getNumColumns());
        setDensePointer(allocate(getDatatypeSizeOf(intExact)));
        if (d != 0.0d) {
            getGPUContext().getKernels().launchKernel(DataExpression.DELIM_FILL, ExecutionConfig.getConfigForSimpleVectorOperations(intExact), getDensePointer(), Double.valueOf(d), Integer.valueOf(intExact));
        }
    }

    public boolean isSparseAndEmpty() {
        return (isAllocated() && LibMatrixCUDA.isInSparseFormat(getGPUContext(), this.mat)) && getJcudaSparseMatrixPtr().nnz == 0;
    }

    public long getNnz(String str, boolean z) {
        if (!isAllocated()) {
            throw new DMLRuntimeException("Expected the GPU object to be allocated");
        }
        if (LibMatrixCUDA.isInSparseFormat(getGPUContext(), this.mat)) {
            return getJcudaSparseMatrixPtr().nnz;
        }
        if (!z) {
            return -1L;
        }
        long nanoTime = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0L;
        GPUContext gPUContext = getGPUContext();
        cusparseHandle cusparseHandle = gPUContext.getCusparseHandle();
        cusparseMatDescr defaultCuSparseMatrixDescriptor = CSRPointer.getDefaultCuSparseMatrixDescriptor();
        if (cusparseHandle == null) {
            throw new DMLRuntimeException("Expected cusparse to be initialized");
        }
        int intExact = toIntExact(this.mat.getNumRows());
        int intExact2 = toIntExact(this.mat.getNumColumns());
        Pointer allocate = gPUContext.allocate(str, getIntSizeOf(intExact));
        Pointer allocate2 = gPUContext.allocate(str, getIntSizeOf(1L));
        LibMatrixCUDA.cudaSupportFunctions.cusparsennz(cusparseHandle, 0, intExact, intExact2, defaultCuSparseMatrixDescriptor, getDensePointer(), intExact, allocate, allocate2);
        int[] iArr = {-1};
        JCuda.cudaMemcpy(Pointer.to(iArr), allocate2, getIntSizeOf(1L), 2);
        if (iArr[0] == -1) {
            throw new DMLRuntimeException("cusparseDnnz did not calculate the correct number of nnz on the GPU");
        }
        gPUContext.cudaFreeHelper(str, allocate, DMLScript.EAGER_CUDA_FREE);
        gPUContext.cudaFreeHelper(str, allocate2, DMLScript.EAGER_CUDA_FREE);
        if (DMLScript.FINEGRAINED_STATISTICS) {
            GPUStatistics.maintainCPMiscTimes(str, CPInstruction.MISC_TIMER_RECOMPUTE_NNZ, System.nanoTime() - nanoTime);
        }
        return iArr[0];
    }

    public boolean acquireDeviceRead(String str) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : acquireDeviceRead on " + this);
        }
        boolean z = false;
        if (!isAllocated()) {
            if (LOG.isTraceEnabled()) {
                LOG.trace("GPU : in acquireDeviceRead, data is not allocated, copying from host, on " + this + ", GPUContext=" + getGPUContext());
            }
            copyFromHostToDevice(str);
            z = true;
        }
        addReadLock();
        if (isAllocated()) {
            return z;
        }
        throw new DMLRuntimeException("Expected device data to be allocated");
    }

    public boolean acquireDeviceModifyDense() {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : acquireDeviceModifyDense on " + this + ", GPUContext=" + getGPUContext());
        }
        boolean z = false;
        if (!isAllocated()) {
            this.mat.setDirty(true);
            if (LOG.isTraceEnabled()) {
                LOG.trace("GPU : data is not allocated, allocating a dense block, on " + this);
            }
            allocateDenseMatrixOnDevice();
            z = true;
        }
        this.dirty = true;
        if (isAllocated()) {
            return z;
        }
        throw new DMLRuntimeException("Expected device data to be allocated");
    }

    public boolean acquireDeviceModifySparse() {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : acquireDeviceModifySparse on " + this + ", GPUContext=" + getGPUContext());
        }
        boolean z = false;
        this.isSparse = true;
        if (!isAllocated()) {
            if (LOG.isTraceEnabled()) {
                LOG.trace("GPU : data is not allocated, allocating a sparse block, on " + this);
            }
            this.mat.setDirty(true);
            allocateSparseMatrixOnDevice();
            z = true;
        }
        this.dirty = true;
        if (isAllocated()) {
            return z;
        }
        throw new DMLRuntimeException("Expected device data to be allocated");
    }

    public boolean acquireHostRead(String str) {
        boolean z = false;
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : acquireDeviceModifySparse on " + this + ", GPUContext=" + getGPUContext());
        }
        if (isAllocated() && this.dirty) {
            if (LOG.isTraceEnabled()) {
                LOG.trace("GPU : data is dirty on device, copying to host, on " + this + ", GPUContext=" + getGPUContext());
            }
            if (isAllocated() && this.dirty) {
                if (LOG.isTraceEnabled()) {
                    LOG.trace("GPU : data is dirty on device, copying to host, on " + this + ", GPUContext=" + getGPUContext());
                }
                copyFromDeviceToHost(str, false, true);
                z = true;
            }
        }
        return z;
    }

    public boolean isLocked() {
        return this.writeLock || this.readLocks.longValue() > 0;
    }

    public void addReadLock() {
        if (this.writeLock) {
            throw new DMLRuntimeException("Attempting to add a read lock when writeLock=" + this.writeLock);
        }
        this.readLocks.increment();
    }

    public void addWriteLock() {
        if (this.readLocks.longValue() > 0) {
            throw new DMLRuntimeException("Attempting to add a write lock when readLocks=" + this.readLocks.longValue());
        }
        if (this.writeLock) {
            throw new DMLRuntimeException("Attempting to add a write lock when writeLock=" + this.writeLock);
        }
        this.writeLock = true;
    }

    public void releaseReadLock() {
        this.readLocks.decrement();
        if (this.readLocks.longValue() < 0) {
            throw new DMLRuntimeException("Attempting to release a read lock when readLocks=" + this.readLocks.longValue());
        }
    }

    public void releaseWriteLock() {
        if (!this.writeLock) {
            throw new DMLRuntimeException("Internal state error : Attempting to release write lock on a GPUObject, which was already released");
        }
        this.writeLock = false;
    }

    public void resetReadWriteLock() {
        this.readLocks.reset();
        this.writeLock = false;
    }

    private void updateReleaseLocks() {
        DMLScript.EvictionPolicy evictionPolicy = DMLScript.GPU_EVICTION_POLICY;
        switch (evictionPolicy) {
            case LRU:
                this.timestamp.set(System.nanoTime());
                return;
            case LFU:
                this.timestamp.addAndGet(1L);
                return;
            case MIN_EVICT:
            case ALIGN_MEMORY:
                return;
            case MRU:
                this.timestamp.set(-System.nanoTime());
                return;
            default:
                throw new DMLRuntimeException("The eviction policy is not supported:" + evictionPolicy.name());
        }
    }

    public void releaseInput() {
        releaseReadLock();
        updateReleaseLocks();
        if (!isAllocated()) {
            throw new DMLRuntimeException("Attempting to release an input before allocating it");
        }
    }

    public void releaseOutput() {
        releaseWriteLock();
        updateReleaseLocks();
        this.dirty = true;
        if (!isAllocated()) {
            throw new DMLRuntimeException("Attempting to release an output before allocating it");
        }
    }

    void allocateDenseMatrixOnDevice() {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : allocateDenseMatrixOnDevice, on " + this + ", GPUContext=" + getGPUContext());
        }
        if (isAllocated()) {
            throw new DMLRuntimeException("Internal error - trying to allocated dense matrix to a GPUObject that is already allocated");
        }
        long numRows = this.mat.getNumRows();
        long numColumns = this.mat.getNumColumns();
        if (numRows <= 0) {
            throw new DMLRuntimeException("Internal error - invalid number of rows when allocating dense matrix:" + numRows);
        }
        if (numColumns <= 0) {
            throw new DMLRuntimeException("Internal error - invalid number of columns when allocating dense matrix:" + numColumns);
        }
        setDensePointer(allocate(getDatatypeSizeOf(numRows * numColumns)));
    }

    void allocateSparseMatrixOnDevice() {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : allocateSparseMatrixOnDevice, on " + this + ", GPUContext=" + getGPUContext());
        }
        if (isAllocated()) {
            throw new DMLRuntimeException("Internal error - trying to allocated sparse matrix to a GPUObject that is already allocated");
        }
        long numRows = this.mat.getNumRows();
        long nnz = this.mat.getNnz();
        if (numRows <= 0) {
            throw new DMLRuntimeException("Internal error - invalid number of rows when allocating sparse matrix");
        }
        if (nnz < 0) {
            throw new DMLRuntimeException("Internal error - invalid number of non zeroes when allocating a sparse matrix");
        }
        setSparseMatrixCudaPointer(CSRPointer.allocateEmpty(getGPUContext(), nnz, numRows));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public long getSizeOnDevice() {
        long numRows = this.mat.getNumRows();
        return LibMatrixCUDA.isInSparseFormat(getGPUContext(), this.mat) ? CSRPointer.estimateSize(this.mat.getNnz(), numRows) : getDatatypeSizeOf(numRows * this.mat.getNumColumns());
    }

    void copyFromHostToDevice(String str) {
        SparseBlockCSR sparseBlockCSR;
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : copyFromHostToDevice, on " + this + ", GPUContext=" + getGPUContext());
        }
        long j = 0;
        if (DMLScript.STATISTICS) {
            j = System.nanoTime();
        }
        long nanoTime = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0L;
        MatrixBlock acquireRead = this.mat.acquireRead();
        if (DMLScript.FINEGRAINED_STATISTICS) {
            if (acquireRead.isInSparseFormat()) {
                GPUStatistics.maintainCPMiscTimes(str, CPInstruction.MISC_TIMER_GET_SPARSE_MB, System.nanoTime() - nanoTime);
            } else {
                GPUStatistics.maintainCPMiscTimes(str, CPInstruction.MISC_TIMER_GET_DENSE_MB, System.nanoTime() - nanoTime);
            }
        }
        if (acquireRead.isInSparseFormat()) {
            int[] iArr = null;
            int[] iArr2 = null;
            double[] dArr = null;
            if (acquireRead.getNonZeros() < 0) {
                acquireRead.recomputeNonZeros();
            }
            this.mat.getMatrixCharacteristics().setNonZeros(acquireRead.getNonZeros());
            SparseBlock sparseBlock = acquireRead.getSparseBlock();
            boolean z = true;
            if (sparseBlock == null && acquireRead.getNonZeros() == 0) {
                z = false;
            } else {
                if (sparseBlock == null && acquireRead.getNonZeros() != 0) {
                    throw new DMLRuntimeException("Expected CP sparse block to be not null.");
                }
                long j2 = 0;
                if (sparseBlock instanceof SparseBlockCSR) {
                    sparseBlockCSR = (SparseBlockCSR) sparseBlock;
                } else if (sparseBlock instanceof SparseBlockCOO) {
                    if (DMLScript.STATISTICS) {
                        j2 = System.nanoTime();
                    }
                    SparseBlockCOO sparseBlockCOO = (SparseBlockCOO) sparseBlock;
                    sparseBlockCSR = new SparseBlockCSR(toIntExact(this.mat.getNumRows()), sparseBlockCOO.rowIndexes(), sparseBlockCOO.indexes(), sparseBlockCOO.values());
                    if (DMLScript.STATISTICS) {
                        GPUStatistics.cudaSparseConversionTime.add(System.nanoTime() - j2);
                    }
                    if (DMLScript.STATISTICS) {
                        GPUStatistics.cudaSparseConversionCount.increment();
                    }
                } else {
                    if (!(sparseBlock instanceof SparseBlockMCSR)) {
                        throw new DMLRuntimeException("Unsupported sparse matrix format for CUDA operations");
                    }
                    if (DMLScript.STATISTICS) {
                        j2 = System.nanoTime();
                    }
                    SparseBlockMCSR sparseBlockMCSR = (SparseBlockMCSR) sparseBlock;
                    sparseBlockCSR = new SparseBlockCSR(sparseBlockMCSR.getRows(), toIntExact(sparseBlockMCSR.size()));
                    if (DMLScript.STATISTICS) {
                        GPUStatistics.cudaSparseConversionTime.add(System.nanoTime() - j2);
                    }
                    if (DMLScript.STATISTICS) {
                        GPUStatistics.cudaSparseConversionCount.increment();
                    }
                }
                iArr = sparseBlockCSR.rowPointers();
                iArr2 = sparseBlockCSR.indexes();
                dArr = sparseBlockCSR.values();
            }
            allocateSparseMatrixOnDevice();
            if (z) {
                long nanoTime2 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0L;
                CSRPointer.copyToDevice(getGPUContext(), getJcudaSparseMatrixPtr(), acquireRead.getNumRows(), acquireRead.getNonZeros(), iArr, iArr2, dArr);
                if (DMLScript.FINEGRAINED_STATISTICS) {
                    GPUStatistics.maintainCPMiscTimes(str, GPUInstruction.MISC_TIMER_HOST_TO_DEVICE, System.nanoTime() - nanoTime2);
                }
            }
        } else {
            double[] denseBlockValues = acquireRead.getDenseBlockValues();
            if (denseBlockValues == null && acquireRead.getSparseBlock() != null) {
                throw new DMLRuntimeException("Incorrect sparsity calculation");
            }
            if (denseBlockValues == null && acquireRead.getNonZeros() != 0) {
                throw new DMLRuntimeException("MatrixBlock is not allocated");
            }
            allocateDenseMatrixOnDevice();
            if (acquireRead.getNonZeros() == 0) {
                long nanoTime3 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0L;
                JCuda.cudaMemset(getDensePointer(), 0, getDatatypeSizeOf(this.mat.getNumRows() * this.mat.getNumColumns()));
                if (DMLScript.FINEGRAINED_STATISTICS) {
                    GPUStatistics.maintainCPMiscTimes(str, GPUInstruction.MISC_TIMER_SET_ZERO, System.nanoTime() - nanoTime3);
                }
            } else {
                LibMatrixCUDA.cudaSupportFunctions.hostToDevice(getGPUContext(), denseBlockValues, getDensePointer(), str);
            }
        }
        this.mat.release();
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaToDevTime.add(System.nanoTime() - j);
        }
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaToDevCount.add(1L);
        }
    }

    public static int toIntExact(long j) {
        if (j < -2147483648L || j > OptimizerUtils.MAX_NUMCELLS_CP_DENSE) {
            throw new DMLRuntimeException("Cannot be cast to int:" + j);
        }
        return (int) j;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void copyFromDeviceToHost(String str, boolean z, boolean z2) throws DMLRuntimeException {
        MatrixBlock matrixBlock;
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : copyFromDeviceToHost, on " + this + ", GPUContext=" + getGPUContext());
        }
        if (this.shadowBuffer.isBuffered()) {
            if (z) {
                return;
            }
            this.shadowBuffer.moveToHost();
            return;
        }
        if (this.shadowBuffer.isEligibleForBuffering(z, z2)) {
            this.shadowBuffer.moveFromDevice(str);
            return;
        }
        if (isDensePointerNull() && getJcudaSparseMatrixPtr() == null) {
            throw new DMLRuntimeException("Cannot copy from device to host as JCuda dense/sparse pointer is not allocated");
        }
        if (!isDensePointerNull() && getJcudaSparseMatrixPtr() != null) {
            throw new DMLRuntimeException("Invalid state : JCuda dense/sparse pointer are both allocated");
        }
        if (getJcudaSparseMatrixPtr() != null && !LibMatrixCUDA.isInSparseFormat(getGPUContext(), this.mat)) {
            throw new DMLRuntimeException("Block not in sparse format on host yet the device sparse matrix pointer is not null");
        }
        if (getJcudaSparseMatrixPtr() != null && isSparseAndEmpty()) {
            this.mat.acquireModify(new MatrixBlock((int) this.mat.getNumRows(), (int) this.mat.getNumColumns(), 0L));
            this.mat.release();
            return;
        }
        long nanoTime = DMLScript.STATISTICS ? System.nanoTime() : 0L;
        if (isDensePointerNull()) {
            int intExact = toIntExact(this.mat.getNumRows());
            int intExact2 = toIntExact(this.mat.getNumColumns());
            int intExact3 = toIntExact(getJcudaSparseMatrixPtr().nnz);
            double[] dArr = new double[intExact3];
            LibMatrixCUDA.cudaSupportFunctions.deviceToHost(getGPUContext(), getJcudaSparseMatrixPtr().val, dArr, str, z);
            int[] iArr = new int[intExact + 1];
            int[] iArr2 = new int[intExact3];
            CSRPointer.copyPtrToHost(getJcudaSparseMatrixPtr(), intExact, intExact3, iArr, iArr2);
            if (z2) {
                clearData(str, true);
            }
            matrixBlock = new MatrixBlock(intExact, intExact2, intExact3, new SparseBlockCSR(iArr, iArr2, dArr, intExact3));
        } else {
            matrixBlock = new MatrixBlock(toIntExact(this.mat.getNumRows()), toIntExact(this.mat.getNumColumns()), false);
            matrixBlock.allocateDenseBlock();
            LibMatrixCUDA.cudaSupportFunctions.deviceToHost(getGPUContext(), getDensePointer(), matrixBlock.getDenseBlockValues(), str, z);
            if (z2) {
                clearData(str, true);
            }
            matrixBlock.recomputeNonZeros();
        }
        this.mat.acquireModify(matrixBlock);
        this.mat.release();
        if (DMLScript.STATISTICS && !z) {
            long nanoTime2 = System.nanoTime() - nanoTime;
            int i = !isDensePointerNull() ? 1 : 3;
            GPUStatistics.cudaFromDevTime.add(nanoTime2);
            GPUStatistics.cudaFromDevCount.add(i);
        }
        this.dirty = false;
    }

    public void clearData(String str, boolean z) throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace("GPU : clearData on " + this + ", GPUContext=" + getGPUContext());
        }
        if (!isDensePointerNull()) {
            getGPUContext().cudaFreeHelper(str, getDensePointer(), z);
        }
        if (getJcudaSparseMatrixPtr() != null) {
            getJcudaSparseMatrixPtr().deallocate(z);
        }
        clearDensePointer();
        this.shadowBuffer.clearShadowPointer();
        this.jcudaSparseMatrixPtr = null;
        resetReadWriteLock();
        getGPUContext().getMemoryManager().removeGPUObject(this);
    }

    public CSRPointer getJcudaSparseMatrixPtr() {
        return this.jcudaSparseMatrixPtr;
    }

    public boolean isDirty() {
        return this.dirty;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder("GPUObject{");
        sb.append(", dirty=").append(this.dirty);
        sb.append(", readLocks=").append(this.readLocks.longValue());
        sb.append(", writeLock=").append(this.writeLock);
        sb.append(", sparse? ").append(this.isSparse);
        sb.append(", dims=[").append(this.mat.getNumRows()).append(",").append(this.mat.getNumColumns()).append("]");
        if (!isDensePointerNull()) {
            sb.append(", densePtr=").append(getDensePointer());
        }
        if (this.jcudaSparseMatrixPtr != null) {
            sb.append(", sparsePtr=").append(this.jcudaSparseMatrixPtr);
        }
        sb.append('}');
        return sb.toString();
    }
}
