public class ITD extends SNA {

/* This class, Insider Threat Detection (ITD) provides a system to
   detect database members (insiders) who are extracting data from the
   database for unauthorized, illegal, and/or with the intention of
   giving such data to those who want to act against the organization
   running the database.

   A member issues a {\em query} against the database and receives a
   {\em query result.}  The pair, (query, query result) is called a
   {\em transaction.}  There are "m" members of the federated WTS
   intelligence database. */

static boolean simulate = true, train = false, checkqueryresult = false;

static String queryresultsfle = "none", newqueryresultfle = "none";
 
static int nmmembers = 0, member, nmatts = 2, samplesize, nminputvars,
   datflenm = 4;

// TNMNDS is the maximum number of WTS players.

static int n[] = new int[DATSZE];
static int index[] = new int[DATSZE];
static int transactionauthor[] = new int[DATSZE];
static int nmdbentities[] = new int[10];
static int nmax[] = new int[TNMNDS];
static int simqueryresultsize[] = new int[TNMNDS];
static int nmuniquelabels[] = new int[TNMNDS];

static double attdat[][][] = new double[DATSZE][2][TNMNDS];
static double obsqueryresult[][] = new double[DATSZE][ITDmodnn.MAXP];
static double newqueryresult[] = new double[ITDmodnn.MAXP];
static double znormdat[] = new double[DATSZE];
static double newatt[][] = new double[2][TNMNDS];

// --------------------------------------------------------------------

static void manage_() {

// Manage the ITD.

int i, itflag, ier, dum = 0;

/* Fix the number of entities in the database that are associated with
   each of the "nmatts" attributes. */

// !!!!!!!!!!!!!!!!!!!!!!!!!! Add code for continuously-valued attributes.

nmdbentities[0] = 300;
nmdbentities[1] = 300;

nmuniquelabels[0] = 100;
nmuniquelabels[1] = 75;

/* Fix the number of unique labels of each nominally- or ordinally-
   valued attribute. */

// Compute the number of input variables.

nminputvars = 3 * nmatts;

if (train) {

   /* Fit the modular neural network (MNN) to a data set of results from
      member queries.  First, read the query results data file. */

   if (simulate) {
      simQueryResult_();

   } else {
      datacq_();
   }

   summarizeQueryResult_();

   // Specify constants for the modualr neural network.

   Dfpmin.nmnetp = nminputvars;
   dum = (int) Math.round((double) (0.2 * Dfpmin.nmnetp));
   Dfpmin.nmhid = Math.max(dum, 1);
   Dfpmin.nmhid = 1;
   Dfpmin.k = nmmembers;

   ITDmodnn.netfit_(true, obsqueryresult, transactionauthor, samplesize);

   for (i = 0; i < samplesize; ++i) {
      printf_("manage: datum= " + (i + 1) + " member= " +
         transactionauthor[i] + " predicted member= " +
         ITDmodnn.predict_member_(obsqueryresult[i]) + " znorm1= " +
         ITDmodnn.znorm[0] + " znorm2= " + ITDmodnn.znorm[1]);
   }
}

if (checkqueryresult) {
   itflag = detect_();

   if (itflag == 1) {
      printf_("detect test1: member " + member + " is an insider threat.");

   } else if (itflag == 2) {
      printf_("detect test2: member " + member + " is an insider threat.");

   } else {
      printf_("detect: member " + member + " is currently not a threat.");
   }
}
}

// --------------------------------------------------------------------

static void summarizeQueryResult_() {

/* For each query result, find the median, IQR, and relative count
   of each attribute.  Populate the MNN training data set with these
   normalized values.  The index, "j" now tracks the variable number:
   1, 2, or 3.  */

int i, j, k;

for (i = 0; i < samplesize; ++i) {
   j = 0;
   for (k = 0; k < nmatts; ++k) {

      // Sample median.

      obsqueryresult[i][j] = Summry.quantile_(n[i], attdat[i][k], 0.5);
      obsqueryresult[i][j] /= ((double) nmuniquelabels[k]);
      ++j;

      // Sample IQR.

      obsqueryresult[i][j] =
         Summry.quantile_(n[i], attdat[i][k], 0.75) -
         Summry.quantile_(n[i], attdat[i][k], 0.25);
      obsqueryresult[i][j] /= ((double) nmuniquelabels[k]);
      ++j;

      // Size.

      obsqueryresult[i][j] = ((double) n[i]) / ((double) nmdbentities[k]);
      ++j;
   }
   printf_("summarizeQueryResult: member= " + transactionauthor[i] + " " +
      obsqueryresult[i][0] + " " +
      obsqueryresult[i][1] + " " +
      obsqueryresult[i][2]);
}

return;
}

// --------------------------------------------------------------------

static void datacq_() {

// Reads-in query results from member-issued queries against the database.

/* If a transaction does not result in a threat detection, that
   transaction's query result is added to a member-query result data set.

   The ITD MNN is refitted to this data set every time 100 new,
   non-threat transactions have been added to the data set.

   Data addition and re-fitting decisions and tasks are directed by the
   JAVA program, "ITD.manage_()" that is called by a "Start-Process"
   command in the PowerShell script, "fedquery.ps1" every time a query is
   issued against the federated WTS database. */

int member, i = 0, j, k;

/* Open data file and read in query result data.  An observation
   record is:
                transactionauthor var1 var2 ... varp
*/

/* Routine assumes there are exactly "nmatts" attributes.  The
   index, "i" tracks the observation (a member's transaction), the
   index, "k" tracks the attribute within the transaction, and the
   "j" index tracks the entity associated with the attribute within
   a member's query result.

   "n[i]" is the number of entities that give their attribute values
   in query-result "i" that is the result of a member's query.

   The i^th observation in a set of query results (the data set) is
   structured as follows.

   member#_i n[i]
   att1_1   att2_1
     .        .
     .        .
     .        .
   att1_j   att2_j
     .        .
     .        .
     .        .
   att1_n_i att2_n_i
*/

// Read query results.

fleopen_(datflenm, queryresultsfle, 'r');
nmmembers = 0;
do {
   member = fgetint_(datflenm);
   n[i] = fgetint_(datflenm);
   for (j = 0; j < n[i]; ++j) {
      for (k = 0; k < nmatts; ++k) {
         attdat[i][k][j] = (double) fgetint_(datflenm);

         if (attdat[i][k][j] > (double) nmdbentities[k]) {
            iderr_("datacq: attdat= " + attdat[i][k][j] +
                     " nmdbentities= " + nmdbentities[k]);
         }
      }
   }
   transactionauthor[i] = member;

   if (member > nmmembers) {
      nmmembers = member;
   }
   ++i;
} while (!checkbuffeof_(datflenm));
fclose_(4, 'r');
samplesize = i;

return;
}

// --------------------------------------------------------------------

static int detect_() {

/* Checks each database transaction to see if its author is a member
   who has become an Insider Threat.

   ITD rule.

1. Predict who issued the query.  If this prediction is not "member,"
   declare that "member" has become an insider threat and return.

2. Otherwise, compute all "znorm" values from all of "member"'s observed
   query results.  Do this for each member.

3. Find the 20% percentile, "z^i_0.2" of the in-sample "znorm" values from
   each "choice" computation for "member."  Do this for each other member.

4. After "member" completes a new transaction, compute the "znorm" value,
   "znew" from this query result.

5. If "znew < z^i_0.2, declare that "member" has become an insider
   threat.

NOTE: This rule is valid across different federation sizes ("m").
      And, it adjusts for different levels of distinctiveness of
      transactions across these members. */

int i, j, k, nnew, predmember, nmmemberobs, percentilenval;

double memberznormval, smallprob = .1;

// Read-in member's query result and convert it to an MNN input vector.

fleopen_(datflenm, newqueryresultfle, 'r');
member = fgetint_(datflenm);
nnew = fgetint_(datflenm);
for (j = 0; j < nnew; ++j) {
   for (k = 0; k < nmatts; ++k) {
      newatt[k][j] = fgetint_(datflenm);
   }
}
fclose_(4, 'r');

j = 0;
for (k = 0; k < nmatts; ++k) {
   newqueryresult[j] = Summry.quantile_(nnew, newatt[k], 0.5);
   newqueryresult[j] /= ((double) nmdbentities[k]);
   ++j;
   newqueryresult[j] =
      Summry.quantile_(nnew, newatt[k], 0.75) -
      Summry.quantile_(nnew, newatt[k], 0.25);
   newqueryresult[j] /= ((double) nmdbentities[k]);
   ++j;
   newqueryresult[j] = ((double) nnew) / ((double) nmdbentities[k]);
   ++j;
}

/* See if the MNN predicts the read-in member as the author of the
   query that produced the read-in query result. */

predmember = ITDmodnn.predict_member_(newqueryresult);
for (i = 0; i < nmmembers; ++i) {
   printf_("detect: member= " + (i + 1) + " znorm= " + ITDmodnn.znorm[i]);
}

if (predmember != member) {

   return 1;
}
memberznormval = ITDmodnn.znorm[member - 1];

/* Compute both members' "znorm"  for each observation
   in that member's query results data set.  These "znorm" values
   define the member's "znorm" empirical distribution. */

printf_("\n detect: Predict member using their query results.");
nmmemberobs = 0;
for (i = 0; i < samplesize; ++i) {
   if (transactionauthor[i] != member) {
      continue;
   }

   predmember = ITDmodnn.predict_member_(obsqueryresult[i]);

   if (predmember == member) {
      printf_("detect: member datum= " + i + " znormmember1= " +
         ITDmodnn.znorm[0] + " znormmember2= " + ITDmodnn.znorm[1]);

      znormdat[nmmemberobs] = ITDmodnn.znorm[predmember - 1];

      index[nmmemberobs] = nmmemberobs + 1;
      ++nmmemberobs;
   }
}

// Sort "znormdat" and then find the "smallprob" percentile, "percentilenval."

Idsort.shellsort_(znormdat, index, nmmemberobs);
percentilenval = (int) Math.round(smallprob * ((double) nmmemberobs));

if (percentilenval == 0) {
   percentilenval = 1;
}

/* Test: If the computed "znorm" value on this new query result is
   smaller than a threshold value, "percentilenval," conclude that this
   member may have become an insider threat. */

printf_(" memberznormval= " + memberznormval +
        " znormdat[percentilenval-1]= " + znormdat[percentilenval - 1]);

if (memberznormval < znormdat[percentilenval - 1]) {

   return 2;
}

return 0;
}

// --------------------------------------------------------------------

static void simQueryResult_() {

/* Simulates a query result data set.  In this version, "n[i]" is
   constant for all "i" and "k." */

int i, ii, j, k, startindex = 0, maxinterval = 0, rndminterval = 0,
   indexinterval = 0, lastindex = 0, nmrepspermember = 0;

/* Define a situation: number of members, size of each member's query
   results, spacing between ordered attribute value index values. */

nmmembers = 2;
simqueryresultsize[0] = 5;
simqueryresultsize[1] = 20;
nmax[0] = 2;
nmax[1] = 15;
nmrepspermember = 40;

// Perform the simulation.

i = 0;
for (member = 1; member <= nmmembers; ++member) {
   for (ii = 0; ii < nmrepspermember; ++ii) {
      transactionauthor[i] = member;
      for (k = 0; k < nmatts; ++k) {

         /* Generate a random starting index value that is some fraction
            of this attribute's total number of entities. */

         startindex = Rndm.dscrtunif_(0, 1, (nmdbentities[k] / 5));

         // Find the maximum possible interval between index values.

         maxinterval = (nmdbentities[k] - startindex) /
                       simqueryresultsize[member - 1];

         /* The allowable interval is the minimum of the desired interval
            and the maximum interval. */

         indexinterval = Math.min(nmax[member - 1], maxinterval);

         // Simulate the index values.

         attdat[i][k][0] = (double) startindex;
         lastindex = startindex;
         for (j = 0; j < simqueryresultsize[member - 1]; ++j) {
            rndminterval = Rndm.dscrtunif_(0, 1, indexinterval);
            attdat[i][k][j] = (double) (lastindex + rndminterval);
            lastindex = (int) attdat[i][k][j];
            
            if (lastindex > nmdbentities[k]) {
               iderr_("simQueryResult: i= " + i + " k= " + k +
                  " lastindex= " + lastindex + " nmdbentities= " +
                  nmdbentities[k]);
            }
         }
      }
      n[i] = simqueryresultsize[member - 1];
      ++i;
   }
}
samplesize = i;
printf_("simQueryResult: samplesize= " + samplesize);

return;
}
}
