O uso do RSA para multiplicação de módulos leva a erros no Java Card

Olá, estou trabalhando em um projeto em Java Card que implica muita multiplicação de módulos. Consegui implementar uma multiplicação de módulos nesta plataforma usando o sistema de criptografia RSA, mas parece funcionar para determinados números.

public byte[] modMultiply(byte[] x, short xOffset, short xLength, byte[] y,
        short yOffset, short yLength, short tempOutoffset) {

    //copy x value to temporary rambuffer
    Util.arrayCopy(x, xOffset, tempBuffer, tempOutoffset, xLength);

    // copy the y value to match th size of rsa_object
    Util.arrayFillNonAtomic(eempromTempBuffer, (short)0, (byte) (Configuration.LENGTH_RSAOBJECT_MODULUS-1),(byte)0x00);
    Util.arrayCopy(y,yOffset,eempromTempBuffer,(short)(Configuration.LENGTH_RSAOBJECT_MODULUS - yLength),yLength);

    // x+y
    if (JBigInteger.add(x,xOffset,xLength, eempromTempBuffer,
            (short)0,Configuration.LENGTH_MODULUS)) ;
    if(this.isGreater(x, xOffset, xLength, tempBuffer,Configuration.TEMP_OFFSET_MODULUS, Configuration.LENGTH_MODULUS)>0)
        JBigInteger.subtract(x,xOffset,xLength, tempBuffer,
                Configuration.TEMP_OFFSET_MODULUS, Configuration.LENGTH_MODULUS);

    mRsaCipherForSquaring.init(mRsaPublicKekForSquare, Cipher.MODE_ENCRYPT);

    mRsaCipherForSquaring.doFinal(x, xOffset, Configuration.LENGTH_RSAOBJECT_MODULUS, x,
            xOffset); // OK

    mRsaCipherForSquaring.doFinal(tempBuffer, tempOutoffset, Configuration.LENGTH_RSAOBJECT_MODULUS, tempBuffer, tempOutoffset); // OK

    if (JBigInteger.subtract(x, xOffset, Configuration.LENGTH_MODULUS, tempBuffer, tempOutoffset,
            Configuration.LENGTH_MODULUS)) {
        JBigInteger.add(x, xOffset, Configuration.LENGTH_MODULUS, tempBuffer,
                Configuration.TEMP_OFFSET_MODULUS, Configuration.LENGTH_MODULUS);

    mRsaCipherForSquaring.doFinal(eempromTempBuffer, yOffset, Configuration.LENGTH_RSAOBJECT_MODULUS, eempromTempBuffer, yOffset); //OK 

    if (JBigInteger.subtract(x, xOffset, Configuration.LENGTH_MODULUS, eempromTempBuffer, yOffset,
            Configuration.LENGTH_MODULUS)) {

        JBigInteger.add(x, xOffset, Configuration.LENGTH_MODULUS, tempBuffer,
                Configuration.TEMP_OFFSET_MODULUS, Configuration.LENGTH_MODULUS);

    // ((x+y)^2 - x^2 -y^2)/2
    JBigInteger.modular_division_by_2(x, xOffset,Configuration. LENGTH_MODULUS, tempBuffer, Configuration.TEMP_OFFSET_MODULUS, Configuration.LENGTH_MODULUS);
    return x;

public static boolean add(byte[] x, short xOffset, short xLength, byte[] y,
        short yOffset, short yLength) {
    short digit_mask = 0xff;
    short digit_len = 0x08;
    short result = 0;
    short i = (short) (xLength + xOffset - 1);
    short j = (short) (yLength + yOffset - 1);

    for (; i >= xOffset; i--, j--) {
        result = (short) (result + (short) (x[i] & digit_mask) + (short) (y[j] & digit_mask));

        x[i] = (byte) (result & digit_mask);
        result = (short) ((result >> digit_len) & digit_mask);
    while (result > 0 && i >= xOffset) {
        result = (short) (result + (short) (x[i] & digit_mask));
        x[i] = (byte) (result & digit_mask);
        result = (short) ((result >> digit_len) & digit_mask);

    return result != 0;
public static boolean subtract(byte[] x, short xOffset, short xLength, byte[] y,
        short yOffset, short yLength) {
    short digit_mask = 0xff;
    short i = (short) (xLength + xOffset - 1);
    short j = (short) (yLength + yOffset - 1);
    short carry = 0;
    short subtraction_result = 0;

    for (; i >= xOffset && j >= yOffset; i--, j--) {
        subtraction_result = (short) ((x[i] & digit_mask)
                - (y[j] & digit_mask) - carry);
        x[i] = (byte) (subtraction_result & digit_mask);
        carry = (short) (subtraction_result < 0 ? 1 : 0);
    for (; i >= xOffset && carry > 0; i--) {
        if (x[i] != 0)
            carry = 0;
        x[i] -= 1;

    return carry > 0;

 public short isGreater(byte[] x,short xOffset,short xLength,byte[] y ,short yOffset,short yLength)
        if(xLength > yLength)
            return (short)1;
        if(xLength < yLength)
            return (short)(-1);
        short digit_mask = 0xff;
        short digit_len = 0x08;
        short result = 0;
        short i = (short) (xLength + xOffset - 1);
        short j = (short) (yLength + yOffset - 1);

        for (; i >= xOffset; i--, j--) {
            result = (short) (result + (short) (x[i] & digit_mask) - (short) (y[j] & digit_mask));
            if(result > 0)
                return (short)1;
            if(result < 0)
                return (short)-1;
        return 0;

O código funciona bem para um número pequeno, mas falha em um número maior

