package beasts;

import java.io.Serializable;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:beasts/RLearner.class */
public class RLearner implements Serializable {
    static final long serialVersionUID = 1541947838151220168L;
    private static final int E_GREEDY = 1;
    private static final int SOFTMAX = 2;
    private static final int ACTION_SELECTION = 2;
    private static final int EPSILON = 10;
    private static final double ALPHA = 0.1d;
    private static final double GAMMA = 0.9d;
    private double[][] qvals;
    private int initial_q_val;

    /* JADX INFO: Access modifiers changed from: package-private */
    public RLearner(int i, int i2, int i3) {
        this.qvals = new double[i][i2];
        this.initial_q_val = i3;
        for (int i4 = 0; i4 < i; i4 += E_GREEDY) {
            for (int i5 = 0; i5 < i2; i5 += E_GREEDY) {
                setQ(i4, i5, i3);
            }
        }
    }

    private static int max(double[] dArr) {
        int i = 0;
        double d = dArr[0];
        int length = dArr.length;
        while (true) {
            int i2 = length;
            length--;
            if (i2 <= 0) {
                return i;
            }
            if (dArr[length] > d) {
                d = dArr[length];
                i = length;
            }
        }
    }

    private double[] getQs(int i) {
        return this.qvals[i];
    }

    private void setQ(int i, int i2, double d) {
        this.qvals[i][i2] = d;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public int selectAction(BeastsWorld beastsWorld, int i) {
        double[] qs = getQs(i);
        int i2 = -1;
        switch (2) {
            case E_GREEDY /* 1 */:
                if (beastsWorld.rnd(EPSILON) == 0) {
                    i2 = beastsWorld.rnd(qs.length);
                    break;
                } else {
                    i2 = max(qs);
                    break;
                }
            case 2:
                double[] dArr = new double[qs.length];
                double d = 0.0d;
                for (int i3 = 0; i3 < qs.length; i3 += E_GREEDY) {
                    double exp = Math.exp(qs[i3]);
                    dArr[i3] = exp;
                    d += exp;
                }
                for (int i4 = 0; i4 < qs.length; i4 += E_GREEDY) {
                    int i5 = i4;
                    dArr[i5] = dArr[i5] / d;
                }
                double rnd = beastsWorld.rnd();
                double d2 = 0.0d;
                for (int i6 = 0; i6 < qs.length; i6 += E_GREEDY) {
                    if (d2 < rnd && rnd < d2 + dArr[i6]) {
                        i2 = i6;
                    }
                    d2 += dArr[i6];
                }
                break;
            default:
                throw new RuntimeException("RLearner.selectMove(UNKNOWN)");
        }
        return i2;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void giveReward(int i, int i2, int i3, int i4) {
        double d;
        double d2 = getQs(i)[i2];
        if (i4 >= 0) {
            double[] qs = getQs(i4);
            d = qs[max(qs)];
        } else {
            d = 0.0d;
        }
        setQ(i, i2, d2 + (ALPHA * ((i3 + (GAMMA * d)) - d2)));
    }
}
