/*
 * STAN -- Stream Analyser -- http://www.roqe.org/stan
 * Copyright (c) 2001-2004 Konrad Rieck <kr@roqe.org> 
 * All rights reserved.
 * ---
 * $Id: data.c,v 1.22 2003/12/10 20:29:29 kr Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *    ``This product includes software developed by Konrad Rieck.''
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * ---
 *
 * This file contains the general data io, it splits the input in to blocks
 * and passes the information to the treap and the bit analysis.
 */

#include <sys/types.h>
#include <math.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>

#if HAVE_ALLOCA_H
#include <alloca.h>
#endif

#include <stan.h>
#include <data.h>
#include <treap.h>
#include <bits.h>
#include <bintree.h>
#include <stats.h>
#include <config.h>

extern int errno, verbose, progress, bits;
extern tnode_t **treap;
extern bnode_t *bintree;
size_t size = 0;

void init_data(size_t patlen)
{
   init_treap(patlen);
   init_bintree();
   init_stats();
}

void free_data(size_t patlen)
{
   free_treap(patlen);
   free_bintree();
}

void read_data(const char *path, int patlen, int opt)
{
   FILE *in;
   byte_t buf[BLOCK_SIZE];
   byte_t *pat_buf;
   int d, k, j, n, buf_filled = 0;

   if (strlen(path) > 0) {
      if (!(in = fopen(path, "r")))
	 print_error(path);
   } else {
      in = stdin;
   }

   pat_buf = (byte_t *) alloca(patlen);

   if (verbose)
      printf("Reading stream %s:  ", strlen(path) > 0 ? path : "stdin");

   for (k = BLOCK_SIZE, progress = 0; k > 0; progress++) {
      k = fread(&buf, sizeof(byte_t), BLOCK_SIZE, in);

      if (ferror(in))
	 print_error("fread():");

      for (j = 0; j < k; j++) {

	 for (n = 0; n < patlen - 1; n++)
	    pat_buf[n] = pat_buf[n + 1];
	 pat_buf[n] = buf[j];

	 if (buf_filled < patlen)
	    buf_filled++;

	 for (d = 0; d < patlen; d++) {

	    if (buf_filled > d &&
		(!opt ||
		 (opt == 1 && isprint(buf[j])) ||
		 (opt == 2 && isalpha(buf[j])))) {
	       tinsert(&treap[d], pat_buf + patlen - d - 1, d + 1);

	       if (!d) {
		  if (bits)
		     analyse_byte(pat_buf[patlen - 1]);

		  calc_correlation(pat_buf[patlen - 1]);

		  size++;
	       }

	    }
	 }
      }
      if (verbose)
	 print_progress();
   }

   if (fclose(in))
      print_error(path);

   if (verbose)
      printf("\bDone.\n");
}

size_t size_data()
{
   return size;
}
