import java.io.*;
import org.jsoup.Jsoup;

import java.rmi.RemoteException;
import java.util.*;
import java.util.concurrent.TimeUnit;

import org.openspaces.core.GigaSpace;
import org.openspaces.core.GigaSpaceConfigurer;
import org.openspaces.core.space.EmbeddedSpaceConfigurer;
import org.openspaces.core.space.SpaceProxyConfigurer;

import java.util.Arrays;
import java.util.Properties;

// ---------------------------------------------------------------------

class Storyparse extends Intridslve {

static boolean sourcefound = false, nodate = false, nogroup = false,
   noaction = false, noregion = false, nocountry = false,
   sequentialrun = false, accuracycheck = false, learnmode = false;

static char wordchars[] = new char[200];
static char actorplotsymbol[] = new char[NMACTNS];

static String source = "none", htmlfle, outfle,
   maincountry = "none", parsedematactsfle, soupstring, articletext,
   dboption = "none";

static String storiesfle[] = new String[10];
static String parsedstoriesfle[] = new String[10];
static String checkfle[] = new String[10];

static String vernacgrp[] = new String[MAXNMGRPS];
static String grparchetype[] = new String[MAXNMGRPS];
static String acronym[] = new String[MAXNMGRPS];
static String modelname[] = new String[MAXNMGRPS];
static String sentence[][] = new String[NMSENS][NMWRDS];
static String textsentence[][] = new String[NMSENS][NMWRDS];
static String country[] = new String[NMRGNS];
static String ctryrgn[][] = new String[NMRGNS][2];
static String actor[][] = new String[NMACTNS][20];
static String action[] = new String[NMACTNS];
static String subject[][] = new String[NMACTNS][20];
static String region[] = new String[NMRGNS];
static String actreg[] = new String[NMACTNS];
static String dayname[] = {"Sunday", "Sun", "Monday", "Mon",
	                   "Tuesday", "Tues", "Wednesday", "Wed",
                           "Thursday", "Thurs", "Friday", "Fri",
			   "Saturday", "Sat"};

static String monthname[] = {"January", "Jan", "jan",
	                     "February", "Feb", "feb",
                             "March", "Mar", "mar",
			     "April", "Apr", "apr",
	                     "May", "May", "may",
			     "June", "Jun", "jun",
			     "July", "Jul", "jul",
			     "August", "Aug", "aug",
			     "September", "Sept", "sept",
			     "October", "Oct", "oct",
			     "November", "Nov", "nov",
			     "December", "Dec", "dec"};
static String abbrev[][] = new String[50][2];
static String group[][] = new String[MAXNMGRPS][50];
static String grparche[] = new String[MAXNMGRPS];
static String ematactphrase[] = new String[MAXNMEMAT];
static String ematid[] = new String[MAXNMEMAT];
static String ematmwverb[][][] = new String[MAXNMEMAT][NMEMATOBJS][10];
static String ematdirobj[][][] = new String[MAXNMEMAT][NMEMATOBJS][10];
static String ematprepobj[][][] = new String[MAXNMEMAT][NMEMATOBJS][10];
static String mwordverb[] = new String[10];
static String dirobjphrase[] = new String[10];
static String prepobjphrase[] = new String[10];

static String dbentity[] = new String[NMSENS];

static int articleid, nmvernacgrps, nmparsed = 0, nmstoriesfiles,
   nmstories = 0, nmstoryactions, nmallregions = 1, nmactions,
   nmrawactions = 0, nmregionsttl, nmcountriesttl, nmnodate = 0,
   nmnogroup = 0, nmnoaction = 0, nmnocountry = 0, nmnoregion = 0,
   nmduplicateactions = 0, nmsentences, nmtextsentences,
   dbentitysize = 0, nmlearn = 0, storyday = 0, storymonth = 0,
   grpactnsflenm = 3, dbcreateflenm = 6, learnflenm = 7, checkflenm = 8;

/* Set phrase-matching threshold and the learning threshold.  Perfect overall
   similarity is 1.0. */

static double matchthrshld = .98;
static double learnthrshld = .8;

static int nmsenwrds[] = new int[NMSENS];
static int nmtextsenwrds[] = new int[NMSENS];
static int ematindx[] = new int[NMACTNS];
static int index[] = new int[NMRGNS];
static int nmactors[] = new int[NMACTNS];
static int nmsubjects[] = new int[NMACTNS];
static int nmaliases[] = new int[MAXNMGRPS];

static int nmmwverbs[] = new int[MAXNMEMAT];
static int nmdirobjs[] = new int[MAXNMEMAT];
static int nmprepobjs[] = new int[MAXNMEMAT];

static int mwverbnmwrds[][] = new int[MAXNMEMAT][NMEMATOBJS];
static int dirobjnmwrds[][] = new int[MAXNMEMAT][NMEMATOBJS];
static int prepobjnmwrds[][] = new int[MAXNMEMAT][NMEMATOBJS];

static int actmonth[] = new int[NMACTNS];
static int actday[] = new int[NMACTNS];
static int actsen[] = new int[NMACTNS];
static int nmofmentions[] = new int[MAXNMGRPS];
static int groupsen[][] = new int[MAXNMGRPS][100];
static int nmrgnmentions[] = new int[NMRGNS];
static int rgnsen[][] = new int[NMRGNS][100];
static int ctrysen[][] = new int[NMRGNS][100];
static int nmctrymentions[] = new int[NMRGNS];

static long parsetime = 0;

static double storyyear = 0., storydate = 0.;

static double actyear[] = new double[NMACTNS];
static double ardist[] = new double[NMRGNS];
static double actionsimilarity[][] = new double[MAXNMEMAT][MAXNMEMAT];

// Structure to hold unique actions.

static int rawaction_actionid[] = new int[NMACTNS];
static int rawaction_month[] = new int[NMACTNS];
static int rawaction_day[] = new int[NMACTNS];
static int rawaction_year[] = new int[NMACTNS];
static int rawaction_actmonth[] = new int[NMACTNS];
static int rawaction_actday[] = new int[NMACTNS];
static int rawaction_actyear[] = new int[NMACTNS];
static int rawaction_nmactors[] = new int[NMACTNS];
static int rawaction_nmsubjects[] = new int[NMACTNS];
static int rawaction_nmcountries[] = new int[NMACTNS];
static int rawaction_nmregions[] = new int[NMACTNS];
static String rawaction_actrstrng[] = new String[NMACTNS];
static String rawaction_sbjctstrng[] = new String[NMACTNS];
static String rawaction_action[] = new String[NMACTNS];
static String rawaction_ctrystrng[] = new String[NMACTNS];
static String rawaction_rgnstrng[] = new String[NMACTNS];

static int storyaction_month[] = new int[NMACTNS];
static int storyaction_day[] = new int[NMACTNS];
static int storyaction_year[] = new int[NMACTNS];
static int storyaction_actmonth[] = new int[NMACTNS];
static int storyaction_actday[] = new int[NMACTNS];
static int storyaction_actyear[] = new int[NMACTNS];
static int storyaction_nmactors[] = new int[NMACTNS];
static int storyaction_nmsubjects[] = new int[NMACTNS];
static int storyaction_nmcountries[] = new int[NMACTNS];
static int storyaction_nmregions[] = new int[NMACTNS];
static String storyaction_actrstrng[] = new String[NMACTNS];
static String storyaction_sbjctstrng[] = new String[NMACTNS];
static String storyaction_action[] = new String[NMACTNS];
static String storyaction_ctrystrng[] = new String[NMACTNS];
static String storyaction_rgnstrng[] = new String[NMACTNS];

static GigaSpace gspace = null;

// ---------------------------------------------------------------------

static void parse_stories_() {

/* Parse a news story.  Do this by looking for a date, source, country,
   actor, action, subject.  Perform string comparisons against libraries
   of countries, cities, districts, actors, actions, and subjects.  For
   each story, create an action entry if all categories are matched. */

boolean writeparsedematacts = true;

String newname, line;

int i, j, k, ignoredarticleid, flenm = 0, nmwrds = 0, nmrawstories = 0;

double yearbase = 0.;

/* The value of "articleid" as written in story files is, as of
   May 25, 2023, not used.  Instead this value is started at 1 (see
   sequential_() and impresario_()) and incremented each time a
   story is read. */

articleid = 0;

// Read EMAT file.

Storyutils.readematfle_();

/* Read-in the table of pre-parsed action pattern phrases.  Eventually,
   these entries should be created by machine parsing the "emat.dfn"
   file.  Note that phrases do not use the "_" space symbol. */

Storyutils.readparsedematacts_();
printf_("parse_stories: nmematcodes= " + nmematcodes);

if (writeparsedematacts) {

   // Update the parsed EMAT actions file.

   Storyutils.writeparsedematacts_();
}

/* Compute the maximum similarity between an m-word verb in one action's
   equivalence set and an m-word verb in another action's equivalence
   set.  Do this for all pairs of actions. */

Storyutils.actionsimilarities_();

// Store standard abbreviations.

abbrev[0][0] = "K"; abbrev[0][1] = "one thousand";
abbrev[1][0] = "M"; abbrev[1][1] = "one million";
abbrev[2][0] = "km2"; abbrev[2][1] = "squared kilometers";

// Read country-region database.

if (Groupdata.rgnsinfofle == null) {
   iderr_("parse_stories: rgnsinfofle=none");
}

fleopen_(3, Groupdata.rgnsinfofle, 'r');
i = 0;
do {
   ctryrgn[i][0] = fgetstrng_(3); // country.
   ctryrgn[i][1] = fgetstrng_(3); // region within that country.
   if (!ctryrgn[i][1].equals("all")) {
      ++i;
   }
   if (i == NMRGNS) {
      iderr_("parse i=NMRGNS");
   }
} while (!checkeof_(3));
fclose_(3, 'r');
nmallregions = i;

/* Read group names in the vernacular, their acronyms, and their
   IntID model equivalent name from the "grpinfofle" file. */

nmvernacgrps = Storyutils.readgrpinfo_(2);

/* Setup is complete for a client (performer), so return to Report.report_()
   and from there to Workerg.startWork(). */

if (client) {

   return;
}

// Loop over story files.

for (i = 0; i < nmstoriesfiles; ++i) {

   // Remove html fluff.

   htmlfle = storiesfle[i];
   outfle = "dehtml.dat";
   Dehtml.removehtml_();

   // Second pass to remove html fluff.

   fleopen_(2, outfle, 'r');
   fleopen_(3, "dum.txt", 'w');
   while (fle_ready_(2)) {
      line = fgetline_(2);

      if (line.indexOf("truetype;base64") >= 0 || line.length() < 3) {
         continue;
      }

      // Find number of raw stories.

      if (line.indexOf("beginarticle") >= 0) {
         ++nmrawstories;
      }

      strng = Storyremove.removehtml_(line);
      fprintf_(3, strng);
   }
   fclose_(2, 'r');
   fclose_(3, 'w');

   printf_("parse_stories: storiesfle= " + storiesfle[i] + " nmrawstories= " +
      nmrawstories + " nmvernacgrps= " + nmvernacgrps);

   // Open stories file and read down to the first article to be parsed.

   fleopen_(2, "dum.txt", 'r');
   while (fle_ready_(2)) {
      line = fgetline_(2);
      if (line.indexOf("beginarticle") >= 0) {
         strng = line.replaceAll("beginarticle", "");
         strng = strng.replaceAll(" ", "");
         ignoredarticleid = Integer.parseInt(strng);
         break;
      }
   }

   /* Open the database-creation file, and the output file of group actions.
      Note that the processing sequence is:
          storyfile > group actions file > actions history file. */

   fleopen_(dbcreateflenm, parsedstoriesfle[i], 'w');
   fleopen_(grpactnsflenm, Groupdata.grpactnsfle[i], 'w');

   // In learning mode, open file to list learning sentences.

   fleopen_(learnflenm, "learnsens.txt", 'w');

   // Parse stories either simultaneously or sequentially.

   if (!client && space_in_use) {
      impresario_(articleid, i);

   } else {
      sequentialrun = true;
      sequential_(articleid, i);
   }

   fclose_(2, 'r'); // Close stories file.
   fclose_(dbcreateflenm, 'w'); // Close database creation file.
   fclose_(learnflenm, 'w'); // Close learning file.

   // Remove duplicate actions.

   Storyremove.removeduplicates_();

   if (nmrawactions == 0) {
      printf_("\nparse_stories: parse failed on all stories.");

      return;

   } else if (sequentialrun && accuracycheck) {
      fprintf_(checkflenm, "\nparse_stories: nmstories= " + nmstories +
         " nmparsed= " + nmparsed + " nmrawactions= " + nmrawactions +
         "\n   nmnodate= " + nmnodate + " nmnogroup= " + nmnogroup +
         " nmnoaction= " + nmnoaction + " nmnoregion= " + nmnoregion +
         "\n   nmnocountry= " + nmnocountry + " nmduplicateactions= " +
         nmduplicateactions);
   
      fclose_(checkflenm, 'w');
   }

   // Write the raw actions array to the observed group actions file.

   fprintf_(grpactnsflenm,
      "begincomment\nGroup actions file built from storis file: " + storiesfle[i] +
      "\nendcomment");

   for (j = 0; j < nmrawactions; ++j) {
      fprintf_(grpactnsflenm,
         rawaction_actionid[j] + " " +
         articleid + " " +
         rawaction_month[j] + " " +
         rawaction_day[j] + " " +
         rawaction_year[j] + " " +
         rawaction_nmactors[j] + " " +
         rawaction_actrstrng[j] + " " +
         rawaction_nmsubjects[j] + " " +
         rawaction_sbjctstrng[j] + " " +
         "\n" + rawaction_action[j] + " " +
         "\n" + rawaction_nmcountries[j] + " " +
         rawaction_ctrystrng[j] + " " +
         "\n" + rawaction_nmregions[j] + " " +
         rawaction_rgnstrng[j] + " " +
         "\n" + rawaction_actmonth[j] + " " +
         rawaction_actday[j] + " " +
         rawaction_actyear[j]);
   }
   fclose_(grpactnsflenm, 'w');
} // End of loop over stories files.

}

// -------------------------------------------------------------------

static void sequential_(int articleid, int storiesfilenm) {

// Parse stories sequentially.

String line;

int nmread = 0, ignoredarticleid, i, nmlines = 0, ttlnmtexts = 0;

// Open the accuracy-check file.

if (accuracycheck) {
   fleopen_(checkflenm, checkfle[storiesfilenm], 'w');
   fprintf_(checkflenm, "\n*********** " + checkfle[storiesfilenm] +
      " ***********\n");
}

do {

   // Read article into both a "story" file and a big string.

   fleopen_(4, "story.txt", 'w');
   nmlines = 0;
   soupstring = " ";
   while (fle_ready_(2)) {
      line = fgetline_(2);
      line = line.replaceAll("'", "");
      fprintf_(4, line);
      if (line.indexOf("beginarticle", 0) >= 0) {
         strng = line.replaceAll("beginarticle", "");
         strng = strng.replaceAll(" ", "");
         ignoredarticleid = Integer.parseInt(strng);
         ++articleid;
         break;
      }
      soupstring += " " + line;
      ++nmlines;
   }
   fclose_(4, 'w');
  
   // Read story into "sentence" and "textsentence" arrays.

   fleopen_(4, "story.txt", 'r');
   Storyutils.readstory_(4, articleid);
   fclose_(4, 'r');

   /*
   printf_("\nsequential: articleid= " + articleid + " nmsentences= " +
      nmsentences + " nmtextsentences= " + nmtextsentences);
   */

   if (nmtextsentences < 1) {

      /*
      printf_("sequential: articleid= " + articleid +
         " too few text sentences.");
      */

      continue;

   } else {
      ++ttlnmtexts;
   }

   // Parse the story.

   if (parse_a_story_(articleid, soupstring) == 1) {
      continue;
   }

   ++nmstories;
 
   /* Write this story's articleid, source, date, and country to the
      story database creation file.  This is different from the actions
      file because a single story may contain multiple actions. */

   fprintf_(dbcreateflenm, "articleid " + articleid + " " +
      fdble_(storydate, 6, 2) + " " + source + " " + maincountry +
      "\n" + nmtextsentences);

   /* Now, read sentence component information from the database
      entity array that was populated by "parse_a_story_()."
      Write this information to the story database creation file. */

   for (i = 0; i < dbentitysize; ++i) {
      fprintf_(dbcreateflenm, dbentity[i]);
   }

   // Read story actions into master action array.

   if (nmstoryactions > 0) {
      ++nmparsed;
      for (i = 0; i < nmstoryactions; ++i) {
         rawaction_actionid[nmrawactions] = nmrawactions + 1;
         rawaction_month[nmrawactions] = storyaction_month[i];
         rawaction_day[nmrawactions] = storyaction_day[i];
         rawaction_year[nmrawactions] = storyaction_year[i];
         rawaction_nmactors[nmrawactions] = storyaction_nmactors[i];
         rawaction_actrstrng[nmrawactions] = storyaction_actrstrng[i];
         rawaction_nmsubjects[nmrawactions] = storyaction_nmsubjects[i];
         rawaction_sbjctstrng[nmrawactions] = storyaction_sbjctstrng[i];
         rawaction_action[nmrawactions] = storyaction_action[i];
         rawaction_nmcountries[nmrawactions] = storyaction_nmcountries[i];
         rawaction_ctrystrng[nmrawactions] = storyaction_ctrystrng[i];
         rawaction_nmregions[nmrawactions] = storyaction_nmregions[i];
         rawaction_rgnstrng[nmrawactions] = storyaction_rgnstrng[i];
         rawaction_actmonth[nmrawactions] = storyaction_actmonth[i];
         rawaction_actday[nmrawactions] = storyaction_actday[i];
         rawaction_actyear[nmrawactions] = storyaction_actyear[i];
         ++nmrawactions;
      }
   }
   printf_("sequential: articleid= " + articleid + " nmlines= " + nmlines +
      " number of story actions= " + nmstoryactions);
   
   // Optional accuracy check.
   
   if (accuracycheck) {
      printf_("sequential: nmtextsentences= " + nmtextsentences);
      Storyutils.writestory_(articleid, nmstoryactions,
         storyaction_action);
   }
} while (fle_ready_(2));

printf_("sequential: ttlnmtexts= " + ttlnmtexts);

}

// -------------------------------------------------------------------

static void impresario_(int articleid, int storiesfilenm) {

/* Parse all stories simultaneously by handing them out to the nodes of
   a cluster computer.
 
   Naming convention: An impresario organizes an opera production, and
   individual artisans are temporarily employed by the impresario to
   deliver the opera performance.
  
   Variables beginning with "ret" are returned values from an artisan,
   and variables beginning with "rec" are received values to an artisan
   sent by the impresario. */


String line;

int i, i1, ignoredarticleid, tasknm = 0, nmtasks = 0, nmlines = 0,
   nmcompleted = 0;

Parsetask task[] = new Parsetask[NMACTNS];

// Create a gigaspace.

if (space_in_use && gspace == null) {

   // Get a new Space object.

   gspace = new GigaSpaceConfigurer(new SpaceProxyConfigurer(
      spacename)).defaultTakeTimeout(spacetakewaittime).gigaSpace(); 

   if (gspace == null ) {
      iderr_("impresario: gspace=null");
   }

   try {
   TimeUnit.SECONDS.sleep(shortwaittime);
   } catch (java.lang.InterruptedException ie) {
   System.out.println(ie);
   } 

   for (i = 0; i < 5; ++i) {
      MsgEntry msgTmpl = new MsgEntry();
      MsgEntry msg = new MsgEntry();

      msg = (MsgEntry) gspace.take(msgTmpl);

      if (msg != null) {
         printf_("impresario: leftover Space terminate message, i= " + i +
            " message= " + msg.message);

      } else {
         printf_("impresario: null Space message, i= " + i);
         break;
      }
   }
}

// Read-and-parse loop.

do {

   // Read article into both a "story" file and a big string.

   fleopen_(4, "story.txt", 'w');
   soupstring = " ";
   nmlines = 0;
   while (fle_ready_(2)) {
      line = fgetline_(2);
      line = line.replaceAll("'", "");
      fprintf_(4, line);
      if (line.indexOf("beginarticle", 0) >= 0) {
         strng = line.replaceAll("beginarticle", "");
         strng = strng.replaceAll(" ", "");
         ignoredarticleid = Integer.parseInt(strng);
         ++articleid;
         break;
      }
      soupstring += " " + line;
      ++nmlines;
   }
   fclose_(4, 'w');
  
   // Read story into "sentence" and "textsentence" arrays.

   fleopen_(4, "story.txt", 'r');
   Storyutils.readstory_(4, articleid);
   fclose_(4, 'r');

   printf_("\nimpresario: articleid= " + articleid + " nmsentences= " +
      nmsentences + " nmtextsentences= " + nmtextsentences);

   if (nmtextsentences < 2) {
      printf_("impresario: articleid= " + articleid +
         " too few text sentences.");
      continue;
   }

   ++tasknm;

   // Enter this task into the gigaspace.

   task[tasknm - 1] = new Parsetask(tasknm, articleid, soupstring,
      nmsentences, nmsenwrds, sentence, nmtextsentences, nmtextsenwrds,
      textsentence);

   if (task[tasknm - 1] == null) {
      iderr_("impresario: task(" + tasknm + ")=null");

   } else if (space_in_use && gspace == null) {
      iderr_("impresario: space=null");
   }

   gspace.write(task[tasknm - 1]);

   printf_("impresario: tasknm= " + tasknm + " articleid= " + articleid +
      " nmlines= " + nmlines + " entered into space.");
} while (fle_ready_(2) && tasknm <= 6); // was NMACTNS
nmtasks = tasknm;

// Wait for a typical task to finish.
	 
parsetime = 300;
try {
TimeUnit.SECONDS.sleep(parsetime);
} catch (java.lang.InterruptedException ie) {
System.out.println(ie);
} 
      
// Now, collect task-results from the gigaspace.

ResultEntry template = new ResultEntry();
nmcompleted = 0;
for (i1 = 0; i1 < nmtasks; ++i1) {
   ResultEntry result = new ResultEntry();
   result = gspace.take(template, spacetakewaittime);
   if (result == null || !Parseresult.class.isInstance(result)) {
      continue;
   }
   Parseresult parsedstory = (Parseresult) result;

   // Load story-level return values.

   tasknm = parsedstory.rettasknm.intValue();

   printf_("impresario: reading tasknm= " + tasknm);

   articleid = parsedstory.retarticleid.intValue();
   storydate = parsedstory.retstorydate.doubleValue();
   source = parsedstory.retsource.toString();
   maincountry = parsedstory.retmaincountry.toString();
   nmtextsentences = parsedstory.retnmtextsentences.intValue();
   dbentitysize = parsedstory.retdbentitysize.intValue();

   // Load story action return values.
      
   nmstoryactions = parsedstory.retnmstoryactions.intValue();
   for (i = 0; i < nmstoryactions; ++i) {
      storyaction_month[i] = parsedstory.retmonth[i].intValue();
      storyaction_day[i] = parsedstory.retday[i].intValue();
      storyaction_year[i] = parsedstory.retyear[i].intValue();
      storyaction_actmonth[i] = parsedstory.retactmonth[i].intValue();
      storyaction_actday[i] = parsedstory.retactday[i].intValue();
      storyaction_actyear[i] = parsedstory.retactyear[i].intValue();
      storyaction_nmactors[i] = parsedstory.retnmactors[i].intValue();
      storyaction_nmsubjects[i] = parsedstory.retnmsubjects[i].intValue();
      storyaction_nmcountries[i] = parsedstory.retnmcountries[i].intValue();
      storyaction_nmregions[i] = parsedstory.retnmregions[i].intValue();
      storyaction_actrstrng[i] = parsedstory.retactrstrng[i];
      storyaction_sbjctstrng[i] = parsedstory.retsbjctstrng[i];
      storyaction_action[i] = parsedstory.retaction[i];
      storyaction_ctrystrng[i] = parsedstory.retctrystrng[i];
      storyaction_rgnstrng[i] = parsedstory.retrgnstrng[i];
   }

   // Do not accept a failed extraction due to no date and/or no action.

   if (articleid < 0) {
      continue;
   }

   /* Write this story's articleid, source, date, and country to the
      database creation file. */

   fprintf_(dbcreateflenm, "articleid " + articleid + " " +
      fdble_(storydate, 6, 2) + " " + source + " " + maincountry +
      "\n" + nmtextsentences);

   /* Now, read sentence component information from the database
      entity array that was populated by an artisan running the
      "parse_a_story_()" method.  Write this information to the
      database creation file. */

   for (i = 0; i < dbentitysize; ++i) {
      dbentity[i] = parsedstory.retdbentity[i].toString();
      fprintf_(dbcreateflenm, dbentity[i]);
   }

   if (nmstoryactions > 0) {
      ++nmparsed;
   }
   printf_("impresario: articleid= " + articleid +
      " number of story actions= " + nmstoryactions);
   ++nmcompleted;
   
   // Read story actions into master action array.

   if (nmstoryactions > 0) {
      ++nmparsed;
      for (i = 0; i < nmstoryactions; ++i) {
         rawaction_actionid[nmrawactions] = nmrawactions + 1;
         rawaction_month[nmrawactions] = storyaction_month[i];
         rawaction_day[nmrawactions] = storyaction_day[i];
         rawaction_year[nmrawactions] = storyaction_year[i];
         rawaction_nmactors[nmrawactions] = storyaction_nmactors[i];
         rawaction_actrstrng[nmrawactions] = storyaction_actrstrng[i];
         rawaction_nmsubjects[nmrawactions] = storyaction_nmsubjects[i];
         rawaction_sbjctstrng[nmrawactions] = storyaction_sbjctstrng[i];
         rawaction_action[nmrawactions] = storyaction_action[i];
         rawaction_nmcountries[nmrawactions] = storyaction_nmcountries[i];
         rawaction_ctrystrng[nmrawactions] = storyaction_ctrystrng[i];
         rawaction_nmregions[nmrawactions] = storyaction_nmregions[i];
         rawaction_rgnstrng[nmrawactions] = storyaction_rgnstrng[i];
         rawaction_actmonth[nmrawactions] = storyaction_actmonth[i];
         rawaction_actday[nmrawactions] = storyaction_actday[i];
         rawaction_actyear[nmrawactions] = storyaction_actyear[i];
         ++nmrawactions;
      }
   }

   printf_("impresario: articleid= " + articleid + " nmlines= " + nmlines +
      " number of story actions= " + nmstoryactions);
}
nmstories = nmcompleted;

/* A worker may have died after taking a task from the space but
   before that worker was able to return the completed task to the
   space.  In this case, write an informative message. */

if (nmcompleted < nmtasks) {
   printf_("impresario: nmcompleted= " + nmcompleted + " nmtasks= " +
      nmtasks);
}

// Terminate clients.

Hooke.terminateClients_(gspace);
}

// -----------------------------------------------------------------

static int parse_a_story_(int articleid, String soupstring) {

/* Parse a story.  The "sentence" and "textsentence" arrays have
   been loaded by the calling routine.

   The global variables
   "storydate," "storyday," "storymonth," and "storyyear"
   are updated in this method. */
   

boolean iscoastweek = false;

String trialsource = "none", reason = "none";

int i, j, k, maxk = 0, nmjmp = 0, nmwrds, nmrunning = 0;

double yearbase = 2000., sscore, maxscore;

// First, extract the story's text component using the Jsoup method "parse."

articletext = Jsoup.parse(soupstring).text();

// Initialize counters and arrays.

nmcountriesttl = 0;
nmregionsttl = 0;
for (i = 0; i < NMRGNS; ++i) {
   country[i] = "not_found";
   nmrgnmentions[i] = 0;
   nmctrymentions[i] = 0;
}
for (i = 0; i < NMACTNS; ++i) {
   nmactors[i] = 0;
   nmsubjects[i] = 0;
}
for (i = 0; i < MAXNMGRPS; ++i) {
   nmaliases[i] = 0;
   nmofmentions[i] = 0;
}
dbentitysize = 0;

printf_("\nparse_a_story: articleid= " + articleid + " nmsentences= " +
   nmsentences + " nmtextsentences= " + nmtextsentences);

// Search each sentence for the story's source.

SENSLP: for (i = 0; i < nmsentences; ++i) {
   for (j = 0; j < nmsenwrds[i]; ++j) {
   
      /* Compare a one-word phrase against all "news_organization"
         entries in the "vernacgrp" array.  Then, compare two-word
	 phrases, three-word phrases, and finally, four-word phrases. */

      maxscore = 0.;
      for (nmwrds = 1; nmwrds <= 4; ++nmwrds) {
	 if (j + nmwrds - 1 >= nmsenwrds[i]) {
	    break;
	 }
         trialsource = sentence[i][j];
         for (k = 1; k < nmwrds; ++k) {
            trialsource += "_" + sentence[i][j + k];
	 }
	 maxk = 0;
	 maxscore = 0.;
         for (k = 0; k < nmvernacgrps; ++k) {
            if (!grparchetype[k].equals("news_organization")) {
               continue;
            }
            sscore = Textutils.levenshtein_(trialsource, vernacgrp[k]);
            if (sscore > maxscore) {
               maxscore = sscore;
               maxk = k;
            }
         }
         if (maxscore > .9) {
            source = vernacgrp[maxk];
         }
      }
   }
}

/* Next, search the story for dates, countries, and regions.  Do this by
   reading fields for dates, groups, actions, and regions simultaneously
   using different threads. */

Thread parseworker[] = new Thread[4];

nodate = false;
nogroup = false;
noaction = false;
nocountry = false;
for (i = 0; i < 4; ++i) {
   Parseworker task = new Parseworker(i + 1, nmsentences, nmtextsentences,
		                      articleid);
   parseworker[i] = new Thread(task);
   parseworker[i].start();
}

do {
   nmrunning = 0;
   for (i = 0; i < 4; ++i) {
      if (parseworker[i].isAlive()) {
         ++nmrunning;
      }
   }
} while (nmrunning > 0);

/* Give up if there is no story date or no action. On the other hand,
   repairs can be made if either group or country information is missing. */

if (nodate || noaction) {
   if (nodate && !noaction) {
      reason = "no date";

   } else if (!nodate && noaction) {
      reason = "no action";
      // Storyutils.dumpstory_(articleid, reason);

   } else if (nodate && noaction) {
      reason = "no date and no action";
   }
   printf_("parse_a_story: reason= " + reason + " returning");
   nmtextsentences = 0;
   return 1;
}

// Create an action entry from this story.

Storyaction.storyaction_(articleid);

if (noregion) {
   ++nmnoregion;
   reason = "no region";
}

// Compute the story's date.
   
if (storyyear > 40) {
   yearbase = 1900.;
}

storydate = (yearbase + storyyear) +
   Textutils.nmdays_(0, ((int) storyyear), storymonth, storyday) / 365.;
return 0;
}
}
