merging

79f1845b · birdperson · 1d617988 · 79f1845b · 79f1845b · 79f1845b
Commit 79f1845b authored May 02, 2018 by birdperson
Showing with 4742 additions and 0 deletions
DBscan.c
DensityClustering.c
README.txt
RIVLower.h
RIVPACK1/RIVread.c
RIVPACK1/runscriptUb.sh
RIVPACK1/shittyballs.py
RIVaccessories.h
RIVaccessories.h.gch
RIVcentroids.c
RIVclasses.c
RIVcompare.c
RIVconsolidate.c
RIVcull.c
RIVcullCPUlinux.c
RIVcullDestructive.c
RIVcullGPU.cu
RIVgraphout.c
RIVlexcompare.c
RIVlexicon.h
--- a/DBscan.c
+++ b/DBscan.c
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <time.h>
+#define RIVSIZE 50000
+#define CACHESIZE 0
+#define EPSILON 0.95
+#define MINPOINTS 20
+#define UNCHECKED 0
+#define NOISE -1
+#define MINSIZE 3000
+
+
+#include "RIVtools.h"
+struct DBnode{
+	sparseRIV RIV;
+	int* indexes;
+	int indexCount;
+	int status;
+}*DBset;
+
+
+void DBdive(int C, int i);
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
+
+int main(int argc, char *argv[]){
+	clock_t begintotal = clock();
+	int fileCount = 0;
+	lexOpen("/home/drbob/Documents/lexicon8-50");
+	sparseRIV *fileRIVs = (sparseRIV*) malloc(1*sizeof(sparseRIV));
+	char rootString[2000];
+	if(argc <2){
+		printf("give me a directory");
+		return 1;
+	}
+	strcpy(rootString, argv[1]);
+	strcat(rootString, "/");
+
+	directoryToL2s(rootString, &fileRIVs, &fileCount);
+	printf("fileCount: %d\n", fileCount);
+
+	sparseRIV* fileRIVs_slider = fileRIVs;
+	sparseRIV* fileRIVs_stop = fileRIVs+fileCount;
+	DBset = malloc(fileCount*sizeof(struct DBnode));
+	struct DBnode* DBset_slider = DBset;
+	while(fileRIVs_slider <fileRIVs_stop){
+		(*fileRIVs_slider).magnitude = getMagnitudeSparse(*fileRIVs_slider);
+		(*DBset_slider).RIV = *fileRIVs_slider;
+		(*DBset_slider).indexes = malloc(sizeof(int));
+		(*DBset_slider).indexCount = 0;
+		(*DBset_slider++).status = 0;
+		fileRIVs_slider++;
+
+	}
+	free(fileRIVs);
+
+	clock_t beginnsquared = clock();
+	float cosine;
+
+	denseRIV baseDense;
+	baseDense.values = malloc(RIVSIZE*sizeof(int));
+
+	for(int i=0; i<fileCount; i++){
+		memset(baseDense.values, 0, RIVSIZE*sizeof(int));
+		baseDense.values = addS2D(baseDense.values, DBset[i].RIV);
+		baseDense.magnitude = DBset[i].RIV.magnitude;
+
+		for(int j=i+1; j<fileCount; j++){
+				cosine = cosCompare(baseDense, DBset[j].RIV);
+
+
+			if(cosine>EPSILON){
+
+				DBset[i].indexes = realloc(DBset[i].indexes, (DBset[i].indexCount+1)*sizeof(int));
+				DBset[i].indexes[DBset[i].indexCount++] = j;
+				DBset[j].indexes = realloc(DBset[j].indexes, (DBset[j].indexCount+1)*sizeof(int));
+				DBset[j].indexes[DBset[j].indexCount++] = i;
+			}
+		}
+	}
+	int C = 0;
+	printf("got here\n");
+	for(int i=0; i<fileCount; i++){
+		if(DBset[i].status) continue;
+		if(DBset[i].indexCount <MINPOINTS){
+			DBset[i].status = NOISE;
+		}
+		C++;
+		DBset[i].status = C;
+		DBdive(C, i);
+	}
+
+
+	clock_t endnsquared = clock();
+	double time = (double)(endnsquared - beginnsquared) / CLOCKS_PER_SEC;
+	printf("\nnsquared time:%lf\n\n", time);
+	clock_t endtotal = clock();
+	double time_spent = (double)(endtotal - begintotal) / CLOCKS_PER_SEC;
+	printf("total time:%lf\n\n", time_spent);
+
+
+return 0;
+}
+void DBdive(int C, int i){
+	printf("root: %s\n", DBset[i].RIV.name);
+	struct DBnode *DBnet = malloc(sizeof(struct DBnode));
+	DBnet[0] = DBset[i];
+	int nodeCount = 1;
+	for(int j=0; j<nodeCount; j++){
+		for(int k=0; k<DBnet[j].indexCount;k++){
+			int index = DBnet[j].indexes[k];
+			if(DBset[index].status>0) continue;
+			printf(">>%s\n", DBset[index].RIV.name);
+			DBset[index].status = C;
+			if(DBset[index].indexCount> MINPOINTS){
+				DBnet = realloc(DBnet, (nodeCount+1)*sizeof(struct DBnode));
+				
+				DBnet[nodeCount++] = DBset[index];
+			}
+		}
+	}
+	free(DBnet);
+}
+
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount){
+
+	char pathString[2000];
+	DIR *directory;
+    struct dirent *files = 0;
+
+	if(!(directory = opendir(rootString))){
+		printf("location not found, %s\n", rootString);
+		return;
+	}
+
+	while((files=readdir(directory))){
+		if(*(files->d_name) == '.') continue;
+
+		if(files->d_type == DT_DIR){
+			strcpy(pathString, rootString);
+
+			strcat(pathString, files->d_name);
+			strcat(pathString, "/");
+			directoryToL2s(pathString, fileRIVs, fileCount);
+		}
+		denseRIV temp = lexPull(files->d_name);
+		if(*temp.frequency >MINSIZE){
+			(*fileRIVs) = (sparseRIV*)realloc((*fileRIVs), ((*fileCount)+1)*sizeof(sparseRIV));
+
+			(*fileRIVs)[(*fileCount)] = normalize(temp, 500);
+
+			strcpy((*fileRIVs)[(*fileCount)].name, files->d_name);
+			(*fileCount)++;
+		}
+		free(temp.values);
+	}
+}
+
+
--- a/DensityClustering.c
+++ b/DensityClustering.c
+/* this DB scan algorithm is not meant to be an example of an easily written 
+ * program. rather it is a useful tool that can be used to validate the contents
+ * of a lexicon.  it will identify, using a density based algorithm
+ * clusters of vectors.  if the lexicon is well formed, these clusters should
+ * be numerous, as well as containing well related words */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <time.h>
+//RIVSIZE macro must be set to the size of the RIVs in the lexicon
+#define RIVSIZE 25000
+#define CACHESIZE 0
+#define EPSILON 0.98
+#define MINPOINTS 1
+#define UNCHECKED 0
+#define NOISE -1
+#define MINSIZE 10000
+
+
+#include "RIVtools.h"
+
+/* the node holds a vector, and metadata:
+ * -indexes will hold the array indexes of its neighbors
+ * -indexCount will hold the number of neighbors
+ * -status will hold its cluster, either a cluster number or "unchecked"
+ */
+struct DBnode{
+	sparseRIV RIV;
+	struct DBnode** neighbors;
+	int neighborCount;
+	int status;
+};
+
+void intercompare(struct DBnode* DBset, int nodeCount);
+void DBdive(struct DBnode* root, struct DBnode *DBset, int C);
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
+
+int main(int argc, char *argv[]){
+	if(argc <2){
+		printf("give me a directory");
+		return 1;
+	}
+	int fileCount = 0;
+	
+	sparseRIV *fileRIVs = (sparseRIV*) malloc(1*sizeof(sparseRIV));
+	char rootString[1000];
+	
+	lexOpen(argv[1]);
+	strcpy(rootString, argv[1]);
+	strcat(rootString, "/");
+
+	directoryToL2s(rootString, &fileRIVs, &fileCount);
+	printf("fileCount: %d\n", fileCount);
+	/* an array of nodes, one for each vector */
+	struct DBnode DBset[fileCount];
+	
+	/* fill the node array with vectors and initialize metadata */
+	for(int i = 0; i < fileCount; i++){
+		fileRIVs[i].magnitude = getMagnitudeSparse(fileRIVs[i]);
+		DBset[i].RIV = fileRIVs[i];
+		/* a single malloc for later realloc'ing */
+		DBset[i].neighbors = malloc(sizeof(struct DBnode*));
+		DBset[i].neighborCount = 0;
+		DBset[i].status = UNCHECKED;
+		
+	}
+	/* fileRIVs was only temporary */
+	free(fileRIVs);
+
+	intercompare(DBset, fileCount);
+
+	
+	int C = 0;
+	
+	for(int i=0; i<fileCount; i++){
+		if(DBset[i].status) continue;
+		if(DBset[i].neighborCount <MINPOINTS){
+			DBset[i].status = NOISE;
+			continue;
+		}
+		C++;
+		printf("\ncluster %d\n", C);
+		DBset[i].status = C;
+		printf("root: %s, %d, %lf\n", DBset[i].RIV.name, DBset[i].RIV.frequency, DBset[i].RIV.magnitude);
+		DBdive(&DBset[i], DBset, C);
+	}
+
+
+return 0;
+}
+void DBdive(struct DBnode* root, struct DBnode *DBset, int C){
+
+	for(int i = 0; i < root->neighborCount; i++){
+		/* if this node is not already claimed by a cluster */
+		if(root->neighbors[i]->status > 0){
+			continue;
+		}
+		/* for easier coding, put it in a local variable */
+		struct DBnode *branch = root->neighbors[i];
+		
+		printf(">>%s, %d, %lf\n", branch->RIV.name, branch->RIV.frequency, branch->RIV.magnitude);
+		
+		/* include this in the cluster C */
+		branch->status = C;
+		/* if this branch has enough neighbors to spread */
+		if(branch->neighborCount > MINPOINTS){
+			/* recursive dive into next branch */
+			DBdive(branch, DBset, C);
+		
+		}
+		
+	}
+}
+/* fileRIVs and fileCount are accessed as pointers, so that we can find them changed outside this function
+ */
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount){
+	
+	DIR *directory;
+    struct dirent *files = 0;
+
+	if(!(directory = opendir(rootString))){
+		printf("location not found, %s\n", rootString);
+		return;
+	}
+
+	while((files=readdir(directory))){
+		if(*(files->d_name) == '.') continue;
+
+		if(files->d_type == DT_DIR){
+			/* the lexicon should not have valid sub-directories */
+			continue;
+		}
+		
+		denseRIV* temp = lexPull(files->d_name);
+		/* if the vector has been encountered more than MINSIZE times
+		 * then it should be statistically significant, and useful */
+		if(temp->contextSize >MINSIZE){
+			(*fileRIVs) = (sparseRIV*)realloc((*fileRIVs), ((*fileCount)+1)*sizeof(sparseRIV));
+			(*fileRIVs)[(*fileCount)] = normalize(*temp, 500);
+			(*fileRIVs)[(*fileCount)].magnitude = temp->magnitude;
+			strcpy((*fileRIVs)[(*fileCount)].name, files->d_name);
+			(*fileCount)++;
+		}
+		free(temp);
+	}
+}
+
+void intercompare(struct DBnode* DBset, int nodeCount){
+	double cosine;
+	denseRIV baseDense;
+	for(int i=0; i<nodeCount; i++){
+		/* map the RIV in question to a dense for comparison */
+		memset(baseDense.values, 0, RIVSIZE*sizeof(int));
+		addS2D(baseDense.values, DBset[i].RIV);
+		baseDense.magnitude = DBset[i].RIV.magnitude;
+		/* for each previous vector */
+		for(int j=i+1; j<nodeCount; j++){
+				/* get cosine distance to that vector */
+				cosine = cosCompare(baseDense, DBset[j].RIV);
+
+			/* if this pair is close enough */
+			if(cosine>EPSILON){
+				
+				/* add the pairing to each node's list of neighbors */
+				DBset[i].neighbors = realloc(DBset[i].neighbors, (DBset[i].neighborCount+1)*sizeof(struct DBnode*));
+				DBset[j].neighbors = realloc(DBset[j].neighbors, (DBset[j].neighborCount+1)*sizeof(struct DBnode*));
+				
+				DBset[i].neighbors[DBset[i].neighborCount++] = &DBset[j];
+				DBset[j].neighbors[DBset[j].neighborCount++] = &DBset[i];
+			}
+		}
+	}
+}
--- a/README.txt
+++ b/README.txt
+/* RIV stands for Random Index Vector, referring to the method of generating
+ * the basic vectors that correspond to each word.  each word has an algorithmically
+ * generated vector which represents it in this mathematical model, such that a word
+ * will produce the same vector each time it is encountered*[1]. this base
+ * vector will be referred to as a L1 vector or a barcode vector
+ * 
+ * by summing these vectors, we can get a mathematical representation of
+ * a set of text.  this summed vector will be referred to as an L2 vector
+ * or aggregate vector.  in its simplest implimentation, an L2 vector
+ * representation of a document contains a model of the contents of the 
+ * document, enabling us to compare direction and magnitude of document 
+ * vectors to understand their relationships to each other.
+ * 
+ * but the system we are really interested in is the ability to form 
+ * context vectors
+ * a context vector is the sum of all (L1?) vectors that the word
+ * has been encountered in context with. from these context vectors
+ * certain patterns and relationships between words should emerge. 
+ * what patterns? that is the key question we will try to answer
+ * 
+ * [1] a word produces the same vector each time it is encountered only 
+ * if the environment is the same, ie. RIVs are the same dimensionality
+ * nonzero count is the same.  comparing vectors produced in different 
+ * environments yields meaningless drivel and should be avoided
+ * 
+ * [2] what exactly "context" means remains a major stumbling point.
+ * paragraphs?  sentences?  some potential analyses would expect a static
+ * sized context (the nearest 10 words?) in order to be sensible, but 
+ * it may be that some other definition of context is the most valid for
+ * this model.  we will have to find out.
+ * 
+ * some notes:
+ * 
+ * -sparseRIV vs. denseRIV (sparse vector vs. dense vector)
+ * the two primary data structures we will use to analyze these vectors
+ * each vector type is packed with some metadata 
+ * (name, magnitude, frequency, flags)
+ * 
+ * 	-denseRIV is a standard vector representation.  
+ * each array index corresponds to a dimension
+ * each value corresponds to a measurement in that dimension
+ * 
+ * 	-sparseRIV is vector representation optimized for largely empty vectors
+ * each data point is a location/value pair where the
+ * location represents array index 
+ * value represents value in that array index
+ * 
+ * if we have a sparsely populated dense vector (mostly 0s) such as:
+ * 
+ * |0|0|5|0|0|0|0|0|4|0|
+ * 
+ * there are only 2 values in a ten element array. this could, instead
+ * be represented as
+ * 
+ * |2|8| array indexes
+ * |5|4| array values
+ * |2|   record of size
+ * 
+ * and so, a 10 element vector has been represented in only 5 integers
+ * 
+ * this is important for memory use, of course, but also for rapid calculations
+ * if we have two vectors
+ * 
+ * |0|0|5|0|0|0|0|0|4|0|
+ * |0|0|0|0|0|0|7|0|3|-2|
+ * and we wish to perform the dot product this will take 10 steps,
+ * 9 of which are either 0*0 = 0, or 0*x = 0
+ * if we instead have these represented as sparse vectors
+ * |2|8| 
+ * |5|4| 
+ * |2|  
+ * 
+ * |6|8|9|
+ * |7|3|-2|
+ * |3|
+ * 
+ * we only need to search for matching location values 
+ * or, better yet, if we use a hybrid analysis:
+ * |0|0|5|0|0|0|0|0|4|0|
+ *   ___________/__/_/ 
+ *  / / /
+ * |6|8|9|
+ * |7|3|-2|
+ * |3|
+ * we can simply access the dense vector by indexes held in the sparse vector
+ * reducing this operation to only 3 steps
+ */
+
--- a/RIVLower.h
+++ b/RIVLower.h
+#ifndef RIVLOWER_H_
+#define RIVLOWER_H_
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include "RIVaccessories.h"
+#include "assert.h"
+/* RIVSIZE macro defines the dimensionality off the RIVs we will use
+ * 25000 is the standard, but can be redefined specifically
+ */
+#ifndef RIVSIZE
+#define RIVSIZE 25000
+#endif
+
+#if RIVSIZE<4
+#error "RIVSIZE must be a positive number, greater than 4 (preferably a large positive)"
+#endif
+
+/* NONZeros macro defines the number of non-zero values that will be generated
+ * for any level one (barcode) RIV.  2 is simple and lightweight to begin
+ */
+#ifndef NONZEROS
+#define NONZEROS 2
+#endif
+
+#if NONZEROS%2 || NONZEROS<1
+#error "NONZEROS must be an even, greater than 0 number"
+#endif
+
+
+/* CACHESIZE macro defines the number of RIVs the system will cache.
+ * a larger cache means more memory consumption, but will also be significantly
+ * faster in aggregation and reading applications. doesn't affect systems
+ * that do not use lexpull/push
+ */
+#ifndef CACHESIZE
+#define CACHESIZE 10000
+#endif
+
+#if CACHESIZE<0
+#error "CACHESIZE cannot be a negative number"
+#endif
+
+/* the size of the tempBlock used in consolidation and implicit RIVs */
+#define TEMPSIZE 3*RIVSIZE
+
+
+/* the sparseRIV is a RIV form optimized for RIVs that will be mostly 0s
+ * as this is often an ideal case, it is adviseable as the default 
+ * unless we are doing long term RIV aggregation.
+ * specifically, a sparseRIV contains a pair of arrays, 
+ * containing locations and values, where pairs are found in like array 
+ * indices.
+ */
+ 
+typedef struct{
+	char name[100];
+	int *values;
+	int *locations;
+	size_t count;
+	int frequency;
+	int contextSize;
+	float magnitude;
+}sparseRIV;
+/* the denseRIV is a RIV form optimized for overwhelmingly non-0 vectors
+ * this is rarely the case, but its primary use is for performing vector
+ * math, as comparisons and arithmetic between vectors are ideally 
+ * performed between sparse and dense (hetero-arithmetic)
+ */
+typedef struct{
+	char name[100];
+	void* cached;
+	int frequency;
+	int contextSize;
+	float magnitude;
+	int values[RIVSIZE];
+}denseRIV;
+
+/*RIVKey, holds global variables used under the hood, primarily for the lexicon
+ * it also holds a "temp block" that will be used by the dense to sparse 
+ * conversion and implicit RIV aggregation 
+*/
+struct RIVData{
+	int h_tempBlock[TEMPSIZE];
+	int tempSize;
+	char lexName[255];
+	denseRIV** RIVCache;
+	char flags;
+}static RIVKey;
+
+/*consolidateD2S takes a denseRIV value-set input, and returns a sparse RIV with
+ * all 0s removed. it does not automatically carry metadata, which must be assigned
+ * to a denseRIV after the fact.  often denseRIVs are only temporary, and don't
+ * contain any metadata
+ */
+sparseRIV consolidateD2S(int *denseInput);  //#TODO fix int*/denseRIV confusion
+
+/* makeSparseLocations must be called repeatedly in the processing of a 
+ * file to produce a series of locations from the words of the file
+ * this produces an "implicit" RIV which can be used with the mapI2D function
+ * to create a denseRIV.
+ */
+void makeSparseLocations(char* word,  int *seeds, int seedCount);
+
+/* mapI2D maps an "implicit RIV" that is, an array of index values, 
+ * arranged by chronological order of generation (as per makesparseLocations)
+ * it assigns, in the process of mapping, values according to ordering
+ */
+int* mapI2D(int *locations, int seedCount);
+
+/* highly optimized method for adding vectors.  there is no method 
+ * included for adding D2D or S2S, as this system is faster-enough
+ * to be more than worth using
+ */
+int* addS2D(int* destination, sparseRIV input);
+
+/* caheDump flushes the RIV cache out to relevant files, backing up all 
+ * data.  this is called by the lexClose and signalSecure functions
+ */
+int cacheDump();
+
+/* adds all elements of an implicit RIV (a sparseRIV represented without values)
+ * to a denseRIV.  used by the file2L2 functions in aggregating a document vector
+ */
+int* addI2D(int* destination, int* locations, int seedCount);
+
+/*subtracts a words vector from its own context.  regularly used in lex building
+ */
+void subtractThisWord(denseRIV* vector);
+/* begin definitions */
+
+int* addS2D(int* destination, sparseRIV input){// #TODO fix destination parameter vs calloc of destination
+	
+	int *locations_slider = input.locations;
+	int *values_slider = input.values;
+	int *locations_stop = locations_slider+input.count;
+	
+	/* apply values at an index based on locations */
+	while(locations_slider<locations_stop){
+		
+		destination[*locations_slider] += *values_slider;
+		locations_slider++;
+		values_slider++;
+	}
+	
+	return destination;
+}
+
+int* mapI2D(int *locations, int valueCount){// #TODO fix destination parameter vs calloc of destination
+	int *destination = (int*)calloc(RIVSIZE,sizeof(int));
+	int *locations_slider = locations;
+	int *locations_stop = locations_slider+valueCount;
+
+	/*apply values +1 or -1 at an index based on locations */
+	while(locations_slider<locations_stop){
+	
+		destination[*locations_slider] +=1;
+		locations_slider++;
+		destination[*locations_slider] -= 1;
+		locations_slider++;
+	}
+
+	return destination;
+}
+int* addI2D(int* destination, int *locations, int valueCount){// #TODO fix destination parameter vs calloc of destination
+	int *locations_slider = locations;
+	int *locations_stop = locations_slider+valueCount;
+
+	/*apply values +1 or -1 at an index based on locations */
+	while(locations_slider<locations_stop){
+	
+		destination[*locations_slider] +=1;
+		locations_slider++;
+		destination[*locations_slider] -= 1;
+		locations_slider++;
+	}
+	
+	
+	return destination;
+}
+
+
+
+sparseRIV consolidateD2S(int *denseInput){
+	sparseRIV output;
+	output.count = 0;
+	/* key/value pairs will be loaded to a worst-case sized temporary slot */
+	int* locations = RIVKey.h_tempBlock+RIVSIZE;
+	int* values = locations+RIVSIZE;
+	int* locations_slider = locations;
+	int* values_slider = values;
+	for(int i=0; i<RIVSIZE; i++){
+		
+		/* act only on non-zeros */
+		if(denseInput[i]){
+			
+			/* assign index to locations */
+			*(locations_slider++) = i;
+			
+			/* assign value to values */
+			*(values_slider++) = denseInput[i];
+			
+			/* track size of forming sparseRIV */
+			output.count++;
+		}
+	}
+	/* a slot is opened for the locations/values pair */
+	
+	output.locations = (int*) malloc(output.count*2*sizeof(int));
+	if(!output.locations){
+		printf("memory allocation failed"); //*TODO enable fail point knowledge and security
+	}
+	/* copy locations values into opened slot */
+	memcpy(output.locations, locations, output.count*sizeof(int));
+	
+	output.values = output.locations + output.count;
+	
+	/* copy values into opened slot */
+	memcpy(output.values, values, output.count*sizeof(int));
+	
+	return output;
+}
+
+
+
+void makeSparseLocations(char* word,  int *locations, int count){
+	locations+=count;
+	srand(wordtoSeed(word));
+	int *locations_stop = locations+NONZEROS;
+	while(locations<locations_stop){
+		/* unrolled for speed, guaranteed to be an even number of steps */
+		*locations = rand()%RIVSIZE;
+		locations++;
+		*locations = rand()%RIVSIZE;
+		locations++;
+	}
+	return;
+}
+
+sparseRIV* sparseAllocateFormatted(){
+	sparseRIV* output = (sparseRIV*)calloc(1, sizeof(sparseRIV));
+	
+	
+	
+	
+	return output;
+}
+void subtractThisWord(denseRIV* vector){
+	//set the rand() seed to the word
+	srand(wordtoSeed(vector->name));
+	/* the base word vector is composed of NONZERO (always an even number)
+	 * +1s and -1s at "random" points (defined by the above seed.
+	 * if we invert it to -1s and +1s, we have subtraction */
+	
+	for(int i = 0; i < NONZEROS; i+= 2){
+		vector->values[rand()%RIVSIZE] -= 1;
+		vector->values[rand()%RIVSIZE] += 1;	
+	}
+	/* record a context size 1 smaller */
+	vector->contextSize-= 1;
+	
+}
+
+#endif
+
--- a/RIVPACK1/RIVread.c
+++ b/RIVPACK1/RIVread.c
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <error.h>
+#define RIVSIZE 200000
+#define NONZEROS 2
+#define CACHESIZE 1000
+#include "../RIVtools.h"
+
+
+//this program reads a directory full of files, and adds all context vectors (considering file as context)
+//to all words found in these files. this is used to create a lexicon, or add to an existing one
+
+void fileGrind(FILE* textFile);
+void addContext(denseRIV* lexRIV, sparseRIV context);
+void directoryGrind(char *rootString);
+
+
+int main(int argc, char *argv[]){
+	char pathString[1000];
+
+	//we open the lexicon, if it does not yet exist, it will be created
+	lexOpen("lexicon200-2");
+	
+	//we format the root directory, preparing to scan its contents
+	
+	strcpy(pathString, argv[1]);
+	strcat(pathString, "/");
+	//ensure that the targeted root directory exists
+	
+	struct stat st;
+	if(stat(pathString, &st) == -1) {
+		printf("directory doesn't seem to exist");
+		return 1;
+	}
+	//we will scan the directory, adding all data to our lexicon, as seen inside
+	directoryGrind(pathString);
+
+	//we close the lexicon again, ensuring all data is secured
+	lexClose();
+	return 0;
+}
+
+//mostly a standard recursive Dirent-walk
+void directoryGrind(char *rootString){
+	/* *** begin Dirent walk *** */
+	char pathString[2000];
+	DIR *directory;
+	struct dirent *files = 0;
+
+	if(!(directory = opendir(rootString))){
+		printf("location not found, %s\n", rootString);
+		return;
+	}
+
+	while((files=readdir(directory))){
+		
+		if(!files->d_name[0]) break;
+		while(*(files->d_name)=='.'){
+			files = readdir(directory);
+		}
+		
+		
+		
+		if(files->d_type == DT_DIR){
+			strcpy(pathString, rootString);
+
+			strcat(pathString, files->d_name);
+			strcat(pathString, "/");
+			directoryGrind(pathString);
+			continue;
+		}
+		
+		
+		
+		strcpy(pathString, rootString);
+		strcat(pathString, files->d_name);
+		printf("%s\n", pathString);
+/* *** end dirent walk, begin meat of function  *** */
+		
+		//check for non-txt files
+		char *fileEnding = pathString+strlen(pathString)-4;
+		if(strcmp(fileEnding, ".txt")){
+			printf("skipped: %s\n", files->d_name); 
+			continue;
+		}
+		
+		//open a file within root directory
+		FILE *input = fopen(pathString, "r");
+		if(input){
+			//process this file and add it's data to lexicon
+			fileGrind(input);
+			
+			fclose(input);
+		}
+	}
+}
+
+//form context vector from contents of file, then add that vector to
+//all lexicon entries of the words contained
+void fileGrind(FILE* textFile){
+	//form a context vector.  "clean" indicates that it will ignore any word which
+	//contains unwanted characters
+	sparseRIV contextVector = fileToL2Clean(textFile);
+
+	//an array of denseRIVs, large enough to hold all vectors 
+	//(we don't yet know how many vectors there will be, so we make it big enough for the  maximum)
+	denseRIV* lexiconRIV;
+	
+	char word[100] = {0};
+	while(fscanf(textFile, "%99s", word)){
+		//we ensure that each word exists, and is free of unwanted characters
+		if(feof(textFile)) break;
+		
+		if(!(*word))continue;
+
+		if(!isWordClean((char*)word)){
+			continue;
+		}
+		
+		
+		//we pull the vector corresponding to each word from the lexicon
+		//if it's a new word, lexPull returns a 0 vector
+		lexiconRIV= lexPull(word);
+
+		//we add the context of this file to this wordVector
+		addContext(lexiconRIV, contextVector);
+		
+		//we remove the sub-vector corresponding to the word itself
+		subtractThisWord(lexiconRIV);
+		
+		//we log that this word has been encountered one more time
+		lexiconRIV->frequency += 1;
+		
+		//and finally we push it back to the lexicon for permanent storage
+		lexPush(lexiconRIV);
+		
+	}
+	free(contextVector.locations);
+}
+
+void addContext(denseRIV* lexRIV, sparseRIV context){
+		
+		//add context to the lexRIV, (using sparse-dense vector comparison)
+		addS2D(lexRIV->values, context);
+		
+		//log the "size" of the vector which was added
+		//this is not directly necessary, but is useful metadata for some analises
+		lexRIV->contextSize += context.contextSize;
+		
+}
+
+
+	
--- a/RIVPACK1/runscriptUb.sh
+++ b/RIVPACK1/runscriptUb.sh
+clean(){
+	while [ "$1" ]; do
+		if [ -d "$1" ]; then
+			clean "$1"/*
+		else
+			python shittyballs.py "$1"
+			
+			./RIVread1 cleanbooks/
+			./RIVread2 cleanbooks/
+			./RIVread3 cleanbooks/
+			./RIVread4 cleanbooks/
+			./RIVread5 cleanbooks/
+			./RIVread6 cleanbooks/
+			./RIVread7 cleanbooks/
+
+			rm  -r cleanbooks/
+			#rm "$1"
+		fi
+		shift
+	done
+}
+
+
+
+clean ../../books/*
--- a/RIVPACK1/shittyballs.py
+++ b/RIVPACK1/shittyballs.py
+#import requests
+import re
+import string
+import os
+import sys
+from subprocess import call
+import nltk
+from nltk.corpus import wordnet as wn
+import pdb
+from nltk.stem import PorterStemmer
+
+
+def writeWord(cleanString, word, stem, blacklist):
+    if word == stem:
+        FILE = open("lexicon/" + word, "w")
+        FILE.write("1");
+        FILE.close();
+        return (cleanString + " " + word)
+        
+    elif stem not in blacklist:
+        if len(stem) > 2:
+            FILE = open("lexicon/" + word, "w")
+            FILE.write("2"+stem);
+            FILE.close();
+            FILE = open("lexicon/" + stem, "w")
+            FILE.write("1")
+            FILE.close();
+            return (cleanString + " " + stem)
+
+    return cleanString
+	
+
+def liFix(word):
+    if not word[len(word)-2:] == "li":
+        return word
+    
+    temp = ps.stem(word[:-2])
+    if temp:
+        return temp
+    return word
+
+def cleanWord(word):
+    word = word.lower();
+    regex = re.compile('[^a-z]+')
+    word = regex.sub('', word)
+    #print(word)
+    return word
+
+
+def fileCheck(word):
+
+    try:
+        
+        wordFile = open("lexicon/{}".format(word), "r")
+        code = int(wordFile.read(1))
+    except:
+        return 0
+
+    if code == 2:
+        word = wordFile.read()
+        #print("file flipped to: " + word)
+        wordFile.close()
+        return word
+    elif code == 1:
+        #print("file accepted: " + word)
+        wordFile.close()
+        return word
+    elif code == 0:
+        wordFile.close()
+        return -1
+
+def morphyTest(word):
+    morphyTemp = wn.morphy(word)
+
+    if not morphyTemp:
+        return 0
+
+    return morphyTemp;
+
+
+#begin mainfunction
+
+blacklist = ["a", "an", "the", "so", "as", "how",
+             "i", "me", "we", "they", "you", "it", "he", "she",
+             "but", "have", "had",
+             "for", "by", "in", "out", "as", "not"
+             "be", "were", "was", "am", "are", "is",
+             "mr", "mrs", "mr", "and"]
+word = {}
+ps = PorterStemmer()
+sourceString = sys.argv[1]
+cutDirectories = sourceString.split('/')[-1]
+pathString = cutDirectories.split('.')[0]
+pathString = "cleanbooks/" + pathString + "clean/"
+print(sourceString + "\n")
+
+if not os.path.exists('cleanbooks'):
+    os.makedirs('cleanbooks')
+if not os.path.exists('lexicon'):
+    os.makedirs('lexicon')
+
+if not os.path.exists(pathString):
+    os.makedirs(pathString)
+
+call(["python", "blacklist.py"])
+i=0
+skip = 1
+with open(sourceString, 'U') as fileIn:
+
+    text = fileIn.read()
+
+    for paragraph in text.split(2*os.linesep):
+
+        if not paragraph:
+            continue
+        elif "*** START OF " in paragraph or "*END THE SMALL PRINT" in paragraph:
+            skip = 0
+            continue
+        elif "*** END OF " in paragraph:
+            fileIn.close()
+            sys.exit()
+        elif "End of Project Gutenberg's" in paragraph:
+            fileIn.close()
+            sys.exit()
+        elif "End of the Project Gutenberg" in paragraph:
+            fileIn.close()
+            sys.exit()
+        if not skip:
+            cleanString = ''
+            i += 1
+            fileOut = open("{}{}.txt".format(pathString, i), "w")
+            for line in paragraph.split(os.linesep):
+
+                for tempWord in line.split():
+                    word=cleanWord(tempWord)
+                    if not word:
+                        continue
+                    if len(word) < 3:
+						continue;
+                    if word in blacklist:
+						continue;
+
+
+                    temp = fileCheck(word)
+                    if temp == -1:
+                        continue
+                    if temp:
+                        cleanString = (cleanString + " " + temp);
+                        continue
+						
+                    else:
+                        morphy = morphyTest(word)
+                        if morphy:
+                            stem = ps.stem(morphy)
+                            if stem:
+				stem = liFix(stem)
+                                cleanString = writeWord(cleanString, word, stem, blacklist)
+
+                cleanString = cleanString + os.linesep
+            if len(cleanString.split(' ')) > 2:
+                
+                fileOut.write(cleanString)
+                fileOut.close()
+            else:
+                
+                fileOut.close()
+                os.remove("{}{}.txt".format(pathString, i))
+                i -= 1
+
+
+if skip==1:
+    print(sourceString + " was badly parsed, no output");
--- a/RIVaccessories.h
+++ b/RIVaccessories.h
+#ifndef RIVACCESS_H_
+#define RIVACCESS_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct treenode{
+	void* data;
+	
+	struct treenode* links[26];
+	int downstream;
+	
+};
+int treecut(struct treenode* node, char* letter);
+void stemInsert(struct treenode* node, char* letter, char* data);
+void RIVinsert(struct treenode* node, char* letter, void* data);
+void* treeSearch(struct treenode* node, char* letter);
+struct treenode* stemTreeSetup();
+
+
+/*isWordClean filters words that contain non-letter characters, and 
+ * upperCase letters, allowing only the '_' symbol through
+ */
+int isWordClean(char* word);
+
+/* used by wordClean */
+int isLetter(char c);
+
+/* creates a standard seed from the characters in a word, hopefully unique */
+int wordtoSeed(char* word);
+
+int isLetter(char c){
+	
+	if((c>96 && c<123)||(c == 32)) return 1;
+	else return 0;
+}
+int isWordClean(char* word){
+	char *letter = word;
+	char *word_stop = word+99;
+	while(letter<word_stop){
+		if(!(*letter)) break;
+		if(!(isLetter(*letter))){
+			
+			return 0;
+		}
+		letter++;
+	}
+	return 1;
+		
+}
+int wordtoSeed(char* word){
+	int i=0;
+	int seed = 0;
+	while(*word){
+		/* left-shift 5 each time *should* make seeds unique to words
+		 * this means letters are taken as characters counted in base 32, which
+		 * should be large enough to hold all english characters plus a few outliers
+		 * */
+		seed += (*(word))<<(i*5);
+		word++;
+		i++;
+	}
+	return seed;
+}
+struct treenode* stemTreeSetup(){
+	FILE* netfile = fopen("stemnet2.txt", "r");
+	if(!netfile){
+		printf("no stemnet file");
+		return 0;
+	}
+	
+	struct treenode* rootNode = calloc(1, sizeof(struct treenode));
+	char word[100];
+	char stem[100];
+	
+	while(fscanf(netfile, "%s %s", word, stem)){
+	
+		if(feof(netfile)){
+			break;
+		}
+		
+		stemInsert(rootNode, word, stem);
+		
+	}
+	return rootNode;
+}
+void* treeSearch(struct treenode* node, char* letter){
+	
+	
+	
+	if(*(letter)){
+		if(!node->links[*(letter)-'a']){
+			return NULL;
+		}
+		
+		return treeSearch(node->links[*(letter)-'a'], letter+1);
+	}else{
+		
+		return node->data;
+	}
+}
+void RIVinsert(struct treenode* node, char* letter, void* data){
+	
+	node->downstream++;
+	if(*(letter)){
+		if(!node->links[*(letter)-'a']){
+			node->links[*(letter)-'a'] = calloc(1, sizeof(struct treenode));
+			
+		}
+		RIVinsert(node->links[*(letter)-'a'], letter+1, data);
+		
+	}else{
+		
+		if(node->data) return;
+		node->data = data;
+		
+
+		
+	}
+}
+void stemInsert(struct treenode* node, char* letter, char* data){
+	
+	node->downstream++;
+	if(*(letter)){
+		
+		if(!node->links[*(letter)-'a']){
+			node->links[*(letter)-'a'] = calloc(1, sizeof(struct treenode));
+			
+		}
+		stemInsert(node->links[*(letter)-'a'], letter+1, data);
+		
+	}else{
+		if(node->data) return;
+		node->data = calloc(strlen(data)+1, sizeof(char));
+		
+		
+		strcpy((char*)node->data, data);
+		
+	}
+}
+int treecut(struct treenode* node, char* letter){
+	node->downstream--;
+	int flag;
+	if(*(letter)){
+		if(node->links[*(letter)-'a']){
+		
+			flag = treecut(node->links[*(letter)-'a'], letter+1);
+			if(flag){
+				node->links[*(letter)-'a'] = NULL;
+			}
+		}
+		if(!node->downstream){
+			
+			free(node);
+			return 1;
+		}
+	}else{
+		
+		free(node);
+		return 1;
+	}
+	return 0;
+	
+	
+}
+
+#endif
--- a/RIVaccessories.h.gch
+++ b/RIVaccessories.h.gch
--- a/RIVcentroids.c
+++ b/RIVcentroids.c
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <time.h>
+
+#define RIVSIZE 5
+#define CACHESIZE 0
+#define THRESHOLD 0.70
+#include "RIVtoolsCPUlinux.h"
+
+void getcentroids(sparseRIV* centroids, sparseRIV* vectorSet, int centroidCount, int vectorCount);
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
+
+int main(int argc, char *argv[]){
+	clock_t begintotal = clock();
+	int fileCount = 0;
+	RIVInit();
+	sparseRIV *fileRIVs = (sparseRIV*) malloc(1*sizeof(sparseRIV));
+	char rootString[2000];
+	if(argc <2){ 
+		printf("give me a directory");
+		return 1;
+	}
+	strcpy(rootString, argv[1]);
+	strcat(rootString, "/");
+
+	directoryToL2s(rootString, &fileRIVs, &fileCount);
+	printf("fileCount: %d\n", fileCount);
+	getMagnitudes(fileRIVs, fileCount); 
+
+	clock_t beginnsquared = clock();
+	sparseRIV centroids[5];
+	strcpy(centroids[0].name, "boobs");
+	strcpy(centroids[1].name, "ass");
+	strcpy(centroids[2].name, "shit");
+	strcpy(centroids[3].name, "cocks");
+	strcpy(centroids[4].name, "fuck");
+	for(int i=0; i<5; i++){
+		centroids[i] = wordtoL2(centroids[i].name);
+	}
+	getMagnitudes(centroids, 5);
+	getcentroids(centroids, fileRIVs, 5, fileCount);
+	
+		
+	
+		
+	
+	clock_t endnsquared = clock();
+	double time = (double)(endnsquared - beginnsquared) / CLOCKS_PER_SEC;
+	printf("nsquared time:%lf\n\n", time);
+	printf("%d <", RIVKey.thing);
+	clock_t endtotal = clock();
+	double time_spent = (double)(endtotal - begintotal) / CLOCKS_PER_SEC;
+	printf("total time:%lf\n\n", time_spent);
+	free(fileRIVs);
+	
+return 0;
+}
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount){
+
+	char pathString[2000];
+	DIR *directory;
+    struct dirent *files = 0;
+	
+	if(!(directory = opendir(rootString))){
+		printf("location not found, %s\n", rootString);
+		return;
+	}
+	
+	while((files=readdir(directory))){
+		if(*(files->d_name) == '.') continue;
+	
+		if(files->d_type == DT_DIR){
+			strcpy(pathString, rootString);
+			
+			strcat(pathString, files->d_name);
+			strcat(pathString, "/");
+			directoryToL2s(pathString, fileRIVs, fileCount);
+		}
+			
+
+		strcpy(pathString, rootString);
+		strcat(pathString, files->d_name);
+		FILE *input = fopen(pathString, "r");
+		if(!input){
+			printf("file %s doesn't seem to exist, breaking out of loop", pathString);
+			return;
+		}else{
+			(*fileRIVs) = (sparseRIV*)realloc((*fileRIVs), ((*fileCount)+1)*sizeof(sparseRIV));
+			
+			(*fileRIVs)[(*fileCount)] = fileToL2Clean(input);
+			strcpy((*fileRIVs)[(*fileCount)].name, pathString);
+			
+			fclose(input);
+			(*fileCount)++;
+		}
+	}
+}
+void getcentroids(sparseRIV* centroids, sparseRIV* vectorSet, int centroidCount, int vectorCount){
+	
+	float** cosines = malloc(centroidCount*sizeof(int*));
+	
+	for(int i=0; i<centroidCount; i++){
+		cosines[i] = cosineCompare(centroids[i], vectorSet, vectorCount);
+	}
+	int* centroidIndexes[centroidCount];
+	int indexCounts[centroidCount];
+	int* denses[centroidCount];
+	*centroidIndexes = calloc(vectorCount*centroidCount, sizeof(int));
+	*denses = malloc(RIVKey.RIVsize*centroidCount * sizeof(int));
+	for(int i=1; i<centroidCount; i++){
+		centroidIndexes[i] = centroidIndexes[0]+i*vectorCount;
+		
+		denses[i] = denses[0] +i*RIVKey.RIVsize;
+		
+	}
+	float token = 2.0;
+	int counter = 0;
+	for(int i=0; i<vectorCount; i++){
+		token = 2.0;
+		printf("\nfor vector %d:\n", i);
+		for(int j = 0; j<centroidCount; j++){
+			printf("centroid %d: %f", j, cosines[j][i]);
+			if(fabsf(cosines[j][i])< token){
+				token = fabsf(cosines[j][i]);
+				counter = j;
+			}
+		}
+		centroidIndexes[counter][indexCounts[counter]] = i;
+		indexCounts[counter] += 1;
+	}
+
+	for(int i=0; i<centroidCount; i++){
+		memset(denses[i], 0, RIVKey.RIVsize);
+		printf("\n\nnumber %d\n", i);
+		for(int j=0; j<indexCounts[i]; i++){
+			addS2D(denses[i], vectorSet[j]);
+			for(int k=0; k<RIVKey.RIVsize; k++){
+				printf("%d, ", denses[i][k]);
+			}
+	
+		}
+	}	
+		
+	
+	
+	
+	
+	
+}
--- a/RIVclasses.c
+++ b/RIVclasses.c
+#include <stdio.h>
+#define RIVSIZE 50000
+#include "RIVtools.h"
+char* clean(char* word);
+char* stemmy(struct treenode* searchRoot, char* word);
+sparseRIV line2L3(char* text, struct treenode* searchRoot);
+typedef char label[200];
+struct RIVclass{
+	label name;
+	sparseRIV* set;
+	int setSize;
+};
+LEXICON* lexicon;
+int main(){
+	struct treenode* searchRoot = stemTreeSetup();
+	lexicon = lexOpen("consolidatedLexicon", "rx");
+	
+	int classNo = 0;
+	
+	label* classNames = calloc(1, sizeof(label));
+	int classCount = 0;
+	
+	struct RIVclass* classes = malloc(sizeof(struct RIVclass));
+	
+	
+	
+	
+	FILE* textSet = fopen("../../Downloads/labeledText.tsv", "r");
+	if(!textSet){
+		puts("no file");
+		return 1;
+	}
+	struct RIVclass* class;
+	char text[20000];
+	label className;
+	while(fscanf(textSet, "%s\t%s", text, className)){
+		
+		char* labelTemp = strstr(*classNames, className);
+		if(!labelTemp){
+			/* reinitialize the classnames with a new member */
+			classNames = realloc(classNames, (classCount+1)*sizeof(label));
+			strcpy(classNames[classCount], className);
+			
+			/* reinitialize the classes with a new member */
+			classes = realloc(classes, (classCount+1)*sizeof(struct RIVclass));
+			
+			
+			class = classes+classCount;
+			class->set = malloc(sizeof(sparseRIV));
+			strcpy(class->name, className);
+			class->setSize = 0;			
+		
+			
+			classNo = classCount;
+			classCount++;
+		}else{
+			classNo = (labelTemp-*classNames);
+			class = classes+classNo;
+		
+		}
+		
+		class->set = realloc(class->set, (class->setSize+1) *sizeof(sparseRIV));
+		sparseRIV thing= line2L3(text, searchRoot);
+		class->set[class->setSize] = thing;
+		class->setSize++;
+		
+	}
+
+	
+	for(int i=0; i<classCount; i++){
+		puts(classNames[i]);
+		printf("%d\n\n", classes[i].setSize);
+	}
+	
+	
+	return 0;
+	
+}
+char* clean(char* word){
+	char* letter = word;
+	char output[100] = {0};
+	char *outLetter = output;
+	while(*letter){
+		if(*letter >= 'A' && *letter <= 'Z'){
+			*outLetter = *letter + 32;
+			outLetter++;
+		}else if( *letter >= 'a' && *letter <= 'z'){
+			*outLetter = *letter;
+			outLetter++;
+		}
+		letter++;
+	}
+	
+	
+	strcpy(word,output);
+	return word;	
+}
+	
+char* stemmy(struct treenode* searchRoot, char* word){
+	
+	return treeSearch(searchRoot , word);
+		
+	
+}
+		
+sparseRIV line2L3(char* text, struct treenode* searchRoot){
+	
+	
+	
+	
+	
+	denseRIV accumulate = {0};
+	sparseRIV temp;
+	char* textEnd = text+strlen(text);
+	char word[100];
+	int displacement;
+	while(text<textEnd){
+		sscanf(text, "%99s%n", word, &displacement);
+		text += displacement+1;
+		if(!displacement){
+			break;
+		}
+		if(!(*word)){
+			break;
+		}
+		if(!*clean(word)) continue;
+		char* stem = stemmy(searchRoot, word);
+		if(stem){
+			denseRIV* wordRIV = lexPull(lexicon, stem);
+			if(!wordRIV){
+				//printf("%s, not in lexicon\n", stem);
+				continue;
+			}else{
+				//printf("%s, succesfully pulled\n", stem);
+				temp = consolidateD2S(wordRIV->values);
+				
+				addS2D(accumulate.values, temp);
+				
+				
+				free(temp.locations);
+				free(wordRIV);
+			}
+		}
+	}
+	temp = consolidateD2S(accumulate.values);
+	return temp;
+	
+
+
+	
+}
+		
+		
+		
+		 
+	
+	
+	
+	
+
--- a/RIVcompare.c
+++ b/RIVcompare.c
+#include <stdio.h>
+#include "RIVtools.h"
+#include <dirent.h>
+#include <sys/types.h>
+#include <time.h>
+int main(){
+
+	lexOpen("/home/drbob/Documents/lexicon");
+	FILE *wordList = fopen("wordList.txt", "r");
+	char word[100];
+	denseRIV accept;
+	sparseRIV analyzefloor;
+	sparseRIV analyzerounded;
+	sparseRIV other;
+	while(fscanf(wordList, "%s", word)){
+		if(!*word) break;
+		if(feof(wordList))break;
+		puts(word);
+//		sleep(1);
+		accept = lexPull(word);
+
+		other = consolidateD2S(accept.values);
+		//other.magnitude = getMagnitudeSparse(other);
+//		accept.magnitude = other.magnitude;
+//		analyzerounded = normalize(accept, 2000);
+//		analyzefloor = normalizeFloored(accept, 2000);
+//		if(cosCompare(accept, analyzefloor)>1.00){
+//		printf("floored: %f rounded: %f\tcontextSize: %d\tfrequency: %d\tsaturationbase %d, saturationFloored %d, saturationRounded %d\n", analyzefloor.magnitude, analyzerounded.magnitude, *(accept.contextSize), *(accept.frequency), other.count, analyzefloor.count, analyzerounded.count);
+////}
+		
+//		free(analyzefloor.locations);
+//		free(analyzerounded.locations);
+		free(other.locations);
+		
+		free(accept.values);
+	}
+	lexClose();
+
+}
+
--- a/RIVconsolidate.c
+++ b/RIVconsolidate.c
+#include <stdio.h>
+#define RIVSIZE 50000
+#define CACHESIZE 0
+#include "RIVtools.h"
+#include <dirent.h>
+
+int main(int argc, char* argv[]){
+	LEXICON* lexicon = lexOpen(argv[1], "rx");
+	denseRIV* intake;
+	sparseRIV examine;
+	static denseRIV *output[60000] = {0};
+	DIR *directory;
+    struct dirent *files = 0;
+
+	if(!(directory = opendir(argv[1]))){
+		printf("location not found, %s\n", argv[1]);
+		return 1;
+	}
+	int i=0;
+	int j=0;
+	while((files=readdir(directory))){
+		if(*(files->d_name) == '.') continue;
+
+		if(files->d_type == DT_DIR){
+			/* the lexicon should not have valid sub-directories */
+			continue;
+		}
+		j++;
+		intake = lexPull(lexicon, files->d_name);
+		/* if the vector has been encountered more than MINSIZE times
+		 * then it should be statistically significant, and useful */
+		/*if(intake->contextSize<7000){
+			free(intake);
+			continue;
+		}*/
+		examine = normalize(*intake, 10000);
+		strcpy(examine.name, files->d_name);
+		printf("%d,%d,%lf,%d,%d\n", examine.frequency, examine.contextSize, examine.magnitude, i, j);
+		output[i] = calloc(1, sizeof(denseRIV));
+		addS2D(output[i]->values, examine);
+		output[i]->magnitude = examine.magnitude;
+		strcpy(output[i]->name, files->d_name);
+		output[i]->frequency = intake->frequency;
+		output[i]->contextSize = intake->contextSize;
+		free(intake);
+		free(examine.locations);
+		i++;
+	}
+	lexClose(lexicon);
+	lexicon = lexOpen("consolidatedLexicon", "wx");
+	for(int j=0; j<i; j++){
+		
+		lexPush(lexicon, output[j]);
+		
+		
+	}
+	lexClose(lexicon);
+	return 0;
+}
--- a/RIVcull.c
+++ b/RIVcull.c
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <time.h>
+
+#include "RIVtools.h"
+#define THRESHOLD 0.70
+/* this program identifies all near-duplicates among the documents in the 
+ * chosen root directory, using RIV comparison */
+
+// fills the fileRIVs array with a vector for each file in the root directory
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
+
+int main(int argc, char *argv[]){
+	
+	int fileCount = 0;
+	
+	//initializes the fileRIVs array to be reallocced by later function
+	sparseRIV *fileRIVs = (sparseRIV*) malloc(1*sizeof(sparseRIV));
+	char rootString[2000];
+	if(argc <2){ 
+		printf("give me a directory");
+		return 1;
+	}
+	strcpy(rootString, argv[1]);
+	strcat(rootString, "/");
+
+	//gather all vectors ino the fileRIVs array and count them in fileCount
+	directoryToL2s(rootString, &fileRIVs, &fileCount);
+	printf("fileCount: %d\n", fileCount);
+	
+	//first calculate all magnitudes for later use
+	for(int i = 0; i < fileCount; i++){
+		fileRIVs[i].magnitude = getMagnitudeSparse(fileRIVs[i]);
+		
+	}
+	clock_t begintotal = clock();
+	double cosine;
+	double minmag;
+	double maxmag;
+	
+	//all cosines need a sparse-dense comparison.  so we will create a 
+	denseRIV baseDense;
+		
+	for(int i = 0; i < fileCount; i++){
+		
+		//0 out the denseVector, and map the next sparseVector to it
+		memset(&baseDense, 0, sizeof(denseRIV));
+		addS2D(baseDense.values, fileRIVs[i]);
+		
+		//pass magnitude to the to the dense vector
+		baseDense.magnitude = fileRIVs[i].magnitude;
+		
+		//if these two vectors are too different in size, we can know that they are not duplicates
+		minmag = baseDense.magnitude*.85;
+		maxmag  = baseDense.magnitude*1.15;
+		for(int j = 0; j < i; j++){
+			//if this vector is within magnitude threshold
+			if(fileRIVs[j].magnitude < maxmag 
+			&& fileRIVs[j].magnitude > minmag){
+				
+				//identify the similarity of these two vectors
+				cosine = cosCompare(baseDense, fileRIVs[j]);
+								
+		
+				//if the two are similar enough to be flagged
+				if(cosine>THRESHOLD){
+					printf("%s\t%s\n%f\n", fileRIVs[i].name , fileRIVs[j].name, cosine);
+				}	
+			}
+		}
+	}
+	printf("fileCount: %d", fileCount);
+	free(fileRIVs);
+	clock_t endtotal = clock();
+	double time_spent = (double)(endtotal - begintotal) / CLOCKS_PER_SEC;
+	printf("total time:%lf\n\n", time_spent);
+return 0;
+}
+
+//mostly a standard recursive Dirent-walk
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount){
+/* *** begin Dirent walk *** */
+	char pathString[2000];
+	DIR *directory;
+	struct dirent *files = 0;
+
+	if(!(directory = opendir(rootString))){
+		printf("location not found, %s\n", rootString);
+		return;
+	}
+
+	while((files=readdir(directory))){
+		
+		if(!files->d_name[0]) break;
+		while(*(files->d_name)=='.'){
+			files = readdir(directory);
+		}
+		
+		
+		
+		if(files->d_type == DT_DIR){
+			strcpy(pathString, rootString);
+
+			strcat(pathString, files->d_name);
+			strcat(pathString, "/");
+			directoryToL2s(pathString, fileRIVs, fileCount);
+			continue;
+		}
+		strcpy(pathString, rootString);
+		strcat(pathString, files->d_name);
+
+/* *** end dirent walk, begin meat of function  *** */
+
+		FILE *input = fopen(pathString, "r");
+		if(input){
+			
+			*fileRIVs = (sparseRIV*)realloc((*fileRIVs), ((*fileCount)+1)*sizeof(sparseRIV));
+			
+			(*fileRIVs)[*fileCount] = fileToL2(input);
+			strcpy((*fileRIVs)[*fileCount].name, pathString);
+			
+			fclose(input);
+			 *fileCount += 1;
+		}
+	}
+}
--- a/RIVcullCPUlinux.c
+++ b/RIVcullCPUlinux.c
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <time.h>
+
+#define RIVSIZE 25000
+#define CACHESIZE 0
+#define NONZEROS 2
+#define THRESHOLD 0.70
+#include "RIVtools.h"
+
+
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
+
+int main(int argc, char *argv[]){
+	clock_t begintotal = clock();
+	int fileCount = 0;
+	//RIVInit();
+	sparseRIV *fileRIVs = (sparseRIV*) malloc(1*sizeof(sparseRIV));
+	char rootString[2000];
+	if(argc <2){ 
+		printf("give me a directory");
+		return 1;
+	}
+	strcpy(rootString, argv[1]);
+	strcat(rootString, "/");
+
+	directoryToL2s(rootString, &fileRIVs, &fileCount);
+	printf("fileCount: %d\n", fileCount);
+	
+	sparseRIV* fileRIVs_slider = fileRIVs;
+	sparseRIV* fileRIVs_stop = fileRIVs+fileCount;
+	while(fileRIVs_slider <fileRIVs_stop){
+		(*fileRIVs_slider).magnitude = getMagnitudeSparse(*fileRIVs_slider);
+		fileRIVs_slider++;
+		
+	}
+	
+	clock_t beginnsquared = clock();
+	float cosine;
+	float minmag;
+	float maxmag;
+	denseRIV baseDense;
+	baseDense.values = malloc(RIVSIZE*sizeof(int));
+	fileRIVs_slider = fileRIVs;
+	sparseRIV* comparators_slider;
+	int thing = 0;
+	int count = 0;
+	while(fileRIVs_slider<fileRIVs_stop){
+		comparators_slider = fileRIVs;
+		memset(baseDense.values, 0, RIVSIZE*sizeof(int));
+		baseDense.values = addS2D(baseDense.values, *fileRIVs_slider);
+		baseDense.magnitude = (*fileRIVs_slider).magnitude;
+		minmag = baseDense.magnitude*.85;
+		maxmag  = baseDense.magnitude*1.15;
+		while(comparators_slider < fileRIVs_slider){
+			if((*comparators_slider).magnitude < maxmag && (*comparators_slider).magnitude > minmag && (*comparators_slider).boolean){
+				cosine = cosCompare(baseDense, *comparators_slider);
+				
+				count++;
+				if(cosine>THRESHOLD){
+					printf("%s\t%s\n%f\n", (*fileRIVs_slider).name , (*comparators_slider).name, cosine);
+					(*comparators_slider).boolean = 0; 
+					thing++; 
+				}
+				
+			}
+			comparators_slider++;
+		}
+		
+		
+		fileRIVs_slider++;
+		
+	}
+	clock_t endnsquared = clock();
+	double time = (double)(endnsquared - beginnsquared) / CLOCKS_PER_SEC;
+	printf("\nnsquared time:%lf\n\n", time);
+	printf("\ncosines: %d \n", count);
+	printf("\nsims: %d \n", thing);
+	clock_t endtotal = clock();
+	double time_spent = (double)(endtotal - begintotal) / CLOCKS_PER_SEC;
+	printf("total time:%lf\n\n", time_spent);
+	free(fileRIVs);
+	
+return 0;
+}
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount){
+
+	char pathString[2000];
+	DIR *directory;
+    struct dirent *files = 0;
+	
+	if(!(directory = opendir(rootString))){
+		printf("location not found, %s\n", rootString);
+		return;
+	}
+	
+	while((files=readdir(directory))){
+		if(*(files->d_name) == '.') continue;
+	
+		if(files->d_type == DT_DIR){
+			strcpy(pathString, rootString);
+			
+			strcat(pathString, files->d_name);
+			strcat(pathString, "/");
+			directoryToL2s(pathString, fileRIVs, fileCount);
+		}
+			
+
+		strcpy(pathString, rootString);
+		strcat(pathString, files->d_name);
+		FILE *input = fopen(pathString, "r");
+		if(!input){
+			printf("file %s doesn't seem to exist, breaking out of loop", pathString);
+			return;
+		}else{
+			(*fileRIVs) = (sparseRIV*)realloc((*fileRIVs), ((*fileCount)+1)*sizeof(sparseRIV));
+			
+			(*fileRIVs)[(*fileCount)] = fileToL2(input);
+			strcpy((*fileRIVs)[(*fileCount)].name, pathString);
+			
+			fclose(input);
+			(*fileCount)++;
+		}
+	}
+}
--- a/RIVcullDestructive.c
+++ b/RIVcullDestructive.c
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <time.h>
+
+#define RIVSIZE 25000
+#define CACHESIZE 0
+#define NONZEROS 2
+#define THRESHOLD 0.99
+#include "RIVtools.h"
+
+
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
+
+int main(int argc, char *argv[]){
+	clock_t begintotal = clock();
+	int fileCount = 0;
+	sparseRIV *fileRIVs = (sparseRIV*) malloc(1*sizeof(sparseRIV));
+	char rootString[2000];
+	if(argc <2){
+		printf("give me a directory");
+		return 1;
+	}
+	strcpy(rootString, argv[1]);
+	strcat(rootString, "/");
+
+	directoryToL2s(rootString, &fileRIVs, &fileCount);
+	printf("fileCount: %d\n", fileCount);
+	
+	sparseRIV* fileRIVs_slider = fileRIVs;
+	sparseRIV* fileRIVs_stop = fileRIVs+fileCount;
+	while(fileRIVs_slider <fileRIVs_stop){
+		(*fileRIVs_slider).magnitude = getMagnitudeSparse(*fileRIVs_slider);
+		fileRIVs_slider++;
+		
+	}
+	
+	clock_t beginnsquared = clock();
+	int thing = 0;
+	float cosine;
+	float minmag;
+	float maxmag;
+	denseRIV baseDense;
+	baseDense.values = malloc(RIVSIZE*sizeof(int));
+	fileRIVs_slider = fileRIVs;
+	sparseRIV* comparators_slider;
+	int count = 0;
+	while(fileRIVs_slider<fileRIVs_stop){
+		comparators_slider = fileRIVs;
+		memset(baseDense.values, 0, RIVSIZE*sizeof(int));
+		baseDense.values = addS2D(baseDense.values, *fileRIVs_slider);
+		baseDense.magnitude = (*fileRIVs_slider).magnitude;
+		minmag = baseDense.magnitude*.85;
+		maxmag  = baseDense.magnitude*1.15;
+		while(comparators_slider < fileRIVs_slider){
+			if((*comparators_slider).magnitude < maxmag && (*comparators_slider).magnitude > minmag && (*comparators_slider).boolean){
+				cosine = cosCompare(baseDense, *comparators_slider);
+				
+				count++;
+				if(cosine>THRESHOLD){
+					printf("%s\t%f\n",(*comparators_slider).name, cosine);
+					if(remove((*comparators_slider).name)){
+						printf("   well shit");
+					}
+					(*comparators_slider).boolean = 0; 
+					thing++; 
+				}
+				
+			}
+			comparators_slider++;
+		}
+		
+		
+		fileRIVs_slider++;
+		
+	}
+	clock_t endnsquared = clock();
+	double time = (double)(endnsquared - beginnsquared) / CLOCKS_PER_SEC;
+	printf("\nnsquared time:%lf\n\n", time);
+	printf("\ncosines: %d \n", count);
+	printf("\nsims: %d \n", thing);
+	clock_t endtotal = clock();
+	double time_spent = (double)(endtotal - begintotal) / CLOCKS_PER_SEC;
+	printf("total time:%lf\n\n", time_spent);
+	free(fileRIVs);
+	
+return 0;
+}
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount){
+
+	char pathString[2000];
+	DIR *directory;
+    struct dirent *files = 0;
+	
+	if(!(directory = opendir(rootString))){
+		printf("location not found, %s\n", rootString);
+		return;
+	}
+	
+	while((files=readdir(directory))){
+		if(*(files->d_name) == '.') continue;
+	
+		if(files->d_type == DT_DIR){
+			strcpy(pathString, rootString);
+			
+			strcat(pathString, files->d_name);
+			strcat(pathString, "/");
+			directoryToL2s(pathString, fileRIVs, fileCount);
+		}
+			
+
+		strcpy(pathString, rootString);
+		strcat(pathString, files->d_name);
+		FILE *input = fopen(pathString, "r");
+		puts(pathString);
+		if(!input){
+			printf("file %s doesn't seem to exist, breaking out of loop", pathString);
+			return;
+		}else{
+			(*fileRIVs) = (sparseRIV*)realloc((*fileRIVs), ((*fileCount)+1)*sizeof(sparseRIV));
+			
+			(*fileRIVs)[(*fileCount)] = fileToL2(input);
+			strcpy((*fileRIVs)[(*fileCount)].name, pathString);
+			
+			fclose(input);
+			(*fileCount)++;
+		}
+	}
+}
--- a/RIVcullGPU.cu
+++ b/RIVcullGPU.cu
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <time.h>
+
+#define RIVSIZE 25000
+#define CACHESIZE 0
+#define NONZEROS 2
+#define THRESHOLD 0.70
+#include "RIVtools.h"
+#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__))
+
+static void HandleError(cudaError_t err, const char *file, int line){
+	if(err !=cudaSuccess)
+	{
+		printf("%s in %s at line %d\n", cudaGetErrorString(err), file, line);
+		exit(EXIT_FAILURE);
+	}
+}
+
+
+__global__ void d_mapS2D(int *d_denseSlot, int *d_sparseSlot, int count){
+	int id = blockIdx.x*blockDim.x + threadIdx.x;
+	if(!id<count) return;
+	int *target = d_sparseSlot+id;
+	d_denseSlot[*target] = *(target+count);
+	
+	
+}
+
+__global__ void cosines(int* d_denseBase, int* d_sparseBlock, int* output, int RIVcount){
+	int id =blockIdx.x*blockDim.x + threadIdx.x;
+	if(id>=RIVcount) return;
+	int count = *(d_sparseBlock+RIVSIZE*id);
+	int *locations = &count+1;
+	int *values = locations+count;
+	int dot = 0;
+	output+=id;
+	while(count--){
+		dot+= values[count]*d_denseBase[locations[count]];
+		
+	}
+	*output = dot;
+}
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
+float** cosineMatrix(sparseRIV* RIVs, int RIVcount);
+int main(int argc, char *argv[]){
+	clock_t begintotal = clock();
+	int fileCount = 0;
+	RIVInit();
+	sparseRIV *fileRIVs = (sparseRIV*) malloc(1*sizeof(sparseRIV));
+	char rootString[2000];
+	if(argc <2){ 
+		printf("give me a directory");
+		return 1;
+	}
+	strcpy(rootString, argv[1]);
+	strcat(rootString, "/");
+
+	directoryToL2s(rootString, &fileRIVs, &fileCount);
+	printf("fileCount: %d\n", fileCount);
+	
+	sparseRIV* fileRIVs_slider = fileRIVs;
+	sparseRIV* fileRIVs_stop = fileRIVs+fileCount;
+	while(fileRIVs_slider <fileRIVs_stop){
+		(*fileRIVs_slider).magnitude = getMagnitudeSparse(*fileRIVs_slider);
+		fileRIVs_slider++;
+		
+	}
+	
+	clock_t beginnsquared = clock();
+	float cosine;
+	float minmag;
+	float maxmag;
+	denseRIV baseDense;
+	baseDense.values = (int*)malloc(RIVSIZE*sizeof(int));
+	fileRIVs_slider = fileRIVs;
+	sparseRIV* comparators_slider;
+	int count = 0;
+	
+	cosineMatrix(fileRIVs, fileCount);
+	
+	
+	
+	clock_t endnsquared = clock();
+	double time = (double)(endnsquared - beginnsquared) / CLOCKS_PER_SEC;
+	printf("\nnsquared time:%lf\n\n", time);
+	printf("\ncosines: %d \n", count);
+	printf("\nsims: %d \n", RIVKey.thing);
+	clock_t endtotal = clock();
+	double time_spent = (double)(endtotal - begintotal) / CLOCKS_PER_SEC;
+	printf("total time:%lf\n\n", time_spent);
+	free(fileRIVs);
+	
+return 0;
+}
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount){
+
+	char pathString[2000];
+	DIR *directory;
+    struct dirent *files = 0;
+	
+	if(!(directory = opendir(rootString))){
+		printf("location not found, %s\n", rootString);
+		return;
+	}
+	
+	while((files=readdir(directory))){
+		if(*(files->d_name) == '.') continue;
+	
+		if(files->d_type == DT_DIR){
+			strcpy(pathString, rootString);
+			
+			strcat(pathString, files->d_name);
+			strcat(pathString, "/");
+			directoryToL2s(pathString, fileRIVs, fileCount);
+		}
+			
+
+		strcpy(pathString, rootString);
+		strcat(pathString, files->d_name);
+		FILE *input = fopen(pathString, "r");
+		if(!input){
+			printf("file %s doesn't seem to exist, breaking out of loop", pathString);
+			return;
+		}else{
+			(*fileRIVs) = (sparseRIV*)realloc((*fileRIVs), ((*fileCount)+1)*sizeof(sparseRIV));
+			
+			(*fileRIVs)[(*fileCount)] = fileToL2(input);
+			strcpy((*fileRIVs)[(*fileCount)].name, pathString);
+			
+			fclose(input);
+			(*fileCount)++;
+		}
+	}
+}
+float** cosineMatrix(sparseRIV* RIVs, int RIVcount){
+	int *d_massiveBlock;
+	cudaMalloc((void**)&d_massiveBlock, 100000000*sizeof(int));
+	int *d_outputSlot = d_massiveBlock;
+	int *d_denseSlot = d_outputSlot+(RIVcount*RIVcount/2);
+	int *d_sparseSection =d_denseSlot+RIVSIZE;
+	int *d_sparse_slider = d_sparseSection;
+	for(int i=0; i<RIVcount; i++){
+		HANDLE_ERROR (cudaMemcpy (d_sparse_slider++, &RIVs[i].count, sizeof(int), cudaMemcpyHostToDevice));
+		HANDLE_ERROR (cudaMemcpy (d_sparse_slider, RIVs[i].locations, RIVs[i].count*2*sizeof(int), cudaMemcpyHostToDevice));
+		d_sparse_slider+=RIVs[i].count*2;
+	}
+	
+	
+	
+	
+	
+	
+	
+	
+}
--- a/RIVgraphout.c
+++ b/RIVgraphout.c
+#include <stdio.h>
+#define RIVSIZE 50000
+#define CACHESIZE 0
+#include "RIVtools.h"
+#include <dirent.h>
+
+int main(int argc, char* argv[]){
+	LEXICON* lexicon = lexOpen(argv[1], "r");
+	denseRIV* intake;
+	DIR *directory;
+    struct dirent *files = 0;
+
+	if(!(directory = opendir(argv[1]))){
+		printf("location not found, %s\n", argv[1]);
+		return 1;
+	}
+	int i=0;
+	while((files=readdir(directory))){
+		if(*(files->d_name) == '.') continue;
+
+		if(files->d_type == DT_DIR){
+			/* the lexicon should not have valid sub-directories */
+			continue;
+		}
+		
+		intake = lexPull(lexicon, files->d_name);
+		/* if the vector has been encountered more than MINSIZE times
+		 * then it should be statistically significant, and useful */
+		
+		
+		printf("%d,%d,%lf,%d,%s\n", intake->frequency, intake->contextSize, intake->magnitude, i, files->d_name);
+		
+		free(intake);
+		
+		i++;
+	}
+	lexClose(lexicon);
+
+	return 0;
+}
--- a/RIVlexcompare.c
+++ b/RIVlexcompare.c
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <time.h>
+
+#define RIVSIZE 25000
+#define CACHESIZE 0
+#define NONZEROS 2
+#define THRESHOLD 0.90
+#include "RIVtools.h"
+
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
+
+int main(int argc, char *argv[]){
+	clock_t begintotal = clock();
+	int fileCount = 0;
+	lexOpen("/home/drbob/Documents/lexicon2-25");
+	sparseRIV *fileRIVs = (sparseRIV*) malloc(1*sizeof(sparseRIV));
+	char rootString[2000];
+	if(argc <2){ 
+		printf("give me a directory");
+		return 1;
+	}
+	int thing = 0;
+	strcpy(rootString, argv[1]);
+	strcat(rootString, "/");
+
+	directoryToL2s(rootString, &fileRIVs, &fileCount);
+	printf("fileCount: %d\n", fileCount);
+	
+	sparseRIV* fileRIVs_slider = fileRIVs;
+	sparseRIV* fileRIVs_stop = fileRIVs+fileCount;
+	while(fileRIVs_slider <fileRIVs_stop){
+		(*fileRIVs_slider).magnitude = getMagnitudeSparse(*fileRIVs_slider);
+		fileRIVs_slider++;
+		
+	}
+	
+	clock_t beginnsquared = clock();
+	float cosine;
+	denseRIV baseDense;
+	baseDense.values = malloc(RIVSIZE*sizeof(int));
+	fileRIVs_slider = fileRIVs;
+	sparseRIV* comparators_slider;
+	int count = 0;
+	while(fileRIVs_slider<fileRIVs_stop){
+		if(!fileRIVs_slider->boolean){
+			fileRIVs_slider++;
+			continue;
+		}
+		if(fileRIVs_slider->magnitude == 0) continue;
+		comparators_slider = fileRIVs;
+		memset(baseDense.values, 0, RIVSIZE*sizeof(int));
+		baseDense.values = addS2D(baseDense.values, *fileRIVs_slider);
+		baseDense.magnitude =(*fileRIVs_slider).magnitude;
+		while(comparators_slider < fileRIVs_stop){
+			if(!(comparators_slider->boolean&&strcmp(comparators_slider->name, fileRIVs_slider->name))){
+				comparators_slider++;
+				continue;
+			}
+			if(comparators_slider->magnitude==0) continue;
+			cosine = cosCompare(baseDense, *comparators_slider);
+			count++;
+			if(cosine>THRESHOLD){
+				printf("%s\t%s\n%f\n", fileRIVs_slider->name , comparators_slider->name, cosine);
+				comparators_slider->boolean = 0;
+				thing++;
+			}
+
+			comparators_slider++;
+		}
+
+		fileRIVs_slider++;
+
+	}
+	clock_t endnsquared = clock();
+	double time = (double)(endnsquared - beginnsquared) / CLOCKS_PER_SEC;
+	printf("\nnsquared time:%lf\n\n", time);
+	printf("\ncosines: %d \n", count);
+	printf("\nsims: %d \n", thing);
+	clock_t endtotal = clock();
+	double time_spent = (double)(endtotal - begintotal) / CLOCKS_PER_SEC;
+	printf("total time:%lf\n\n", time_spent);
+	free(fileRIVs);
+	lexClose();
+	
+return 0;
+}
+void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount){
+
+	DIR *directory;
+    struct dirent *files = 0;
+	
+	if(!(directory = opendir(rootString))){
+		printf("location not found, %s\n", rootString);
+		return;
+	}
+	
+	while((files=readdir(directory))){
+		if(*(files->d_name) == '.') continue;
+		denseRIV temp = lexPull(files->d_name);
+		if(*temp.frequency >2000){
+			(*fileRIVs) = (sparseRIV*)realloc((*fileRIVs), ((*fileCount)+1)*sizeof(sparseRIV));
+		
+			(*fileRIVs)[(*fileCount)] = normalize(temp, 500);
+			
+			strcpy((*fileRIVs)[(*fileCount)].name, files->d_name);
+			(*fileCount)++;
+		}
+		free(temp.values);
+	}
+}
+
+
--- a/RIVlexicon.h
+++ b/RIVlexicon.h
+#ifndef RIV_LEXICON_H
+#define RIV_LEXICON_H
+
+#include "RIVLower.h"
+#include "RIVaccessories.h"
+#include "assert.h"
+
+#ifndef READFLAG
+#define READFLAG 0x01
+#endif
+
+#ifndef WRITEFLAG
+#define WRITEFLAG 0x02
+#endif
+
+#ifndef INCFLAG 
+#define INCFLAG 0x04
+#endif
+
+#ifndef CACHEFLAG
+#define CACHEFLAG 0x08
+#endif
+
+#ifndef SORTCACHE
+	#ifndef HASHCACHE
+		#define SORTCACHE
+	#endif
+#endif
+typedef struct{
+	char lexName[100];
+	denseRIV* *cache;
+	struct cacheList* listPoint;
+	char flags;
+	#ifdef SORTCACHE
+	struct treenode* treeRoot;
+	#endif /* SORTCACHE */
+}LEXICON;
+struct cacheList{
+	denseRIV* *cache;
+	struct cacheList* next;
+	struct cacheList* prev;
+}*rootCache = NULL;
+
+#define IODISPLACEMENT   (sizeof(((sparseRIV*)0)->count)\
+						+ sizeof(((sparseRIV*)0)->frequency)\
+						+ sizeof(((sparseRIV*)0)->contextSize)\
+						+ sizeof(((sparseRIV*)0)->magnitude))\
+						/ sizeof(int)
+int* IOstagingSlot = RIVKey.h_tempBlock+RIVSIZE; //#TODO format this better
+
+
+/* lexOpen is called to "open the lexicon", setting up for later calls to
+ * lexPush and lexPull. if the lexicon has not been opened before calls
+ * to these functions, their behavior can be unpredictable, most likely crashing
+ */
+LEXICON* lexOpen(const char* lexName, const char* flags);
+
+/* lexClose should always be called after the last lex push or lex pull call
+ * if the lexicon is left open, some vector data may be lost due to 
+ * un-flushed RIV cache
+ */
+void lexClose(LEXICON*);
+
+
+/* both lexPush and lexPull must be called *after* the lexOpen() function
+ * and after using them the lexClose() function must be called to ensure
+ * data security */
+ 
+/* lexPush writes a denseRIV to the lexicon for permanent storage */
+int lexPush(LEXICON* lexicon, denseRIV* RIVout);
+
+/* cacheCheckOnPush tests the state of this vector in our lexicon cache
+ * and returns 1 on "success" indicating cache storage and no need to push to file
+ * or returns 0 on "failure" indicating that the vector need be pushed to file 
+ */
+int cacheCheckOnPush(LEXICON* lexicon, denseRIV* RIVout);
+
+/* lexPull reads a denseRIV from the lexicon, under "word"
+ * if the file does not exist, it creates a 0 vector with the name of word
+ * lexPull returns a denseRIV *pointer* because its data must be tracked 
+ * globally for key optimizations
+ */
+denseRIV* lexPull(LEXICON* lexicon, char* word);
+
+/* cacheCheckonPull checks if the word's vector is stored in cache,
+ * and returns a pointer to that vector on success
+ * or returns a NULL pointer if the word is not cached, indicating a need 
+ * to pull from file
+ */
+denseRIV* cacheCheckOnPull(LEXICON* lexicon, char* word);
+
+/* fLexPush pushes the data contained in a denseRIV out to a lexicon file,
+ * saving it for long-term aggregation.  function is called by "lexPush",
+ * which is what users should actually use.  lexPush, unlike fLexPush,
+ * has cache logic under the hood for speed and harddrive optimization
+ */
+int fLexPush(LEXICON* lexicon, denseRIV* RIVout);
+
+/* flexPull pulls data directly from a file and converts it (if necessary)
+ * to a denseRIV.  function is called by "lexPull" which is what users 
+ * should actually use.  lexPull, unlike FlexPull, has cache logic under
+ * the hood for speed and harddrive optimization 
+ */
+denseRIV* fLexPull(FILE* lexWord);
+
+/* redefines signal behavior to protect cached data against seg-faults etc*/
+void signalSecure(int signum, siginfo_t *si, void* arg);
+
+/* used exclusively by flexpush to determine write-style (sparse or dense)
+ * and also formats the "IOstagingSlot" for fwrite as a single block if sparse
+ */
+int saturationForStaging(denseRIV* output);
+/* begin definitions */
+LEXICON* lexOpen(const char* lexName, const char* flags){
+	LEXICON* output = calloc(1, sizeof(LEXICON));
+	/* identify the presence of read, write, and exclusive flags */
+	char* r = strstr(flags, "r");
+	char* w = strstr(flags, "w");
+	char* x = strstr(flags, "x");
+	struct stat st = {0};
+	
+	
+	if(w){
+		/* if set to write, we check and create if necessary, the lexicon */
+		if (stat(lexName, &st) == -1) {
+			mkdir(lexName, 0777);
+		}	
+		output->flags |= WRITEFLAG;
+	}else if(r){
+		/* if set to read and not write, return null if lexicon does not exist */
+		if (stat(lexName, &st) == -1) {
+			free(output);
+			return NULL;
+		}	
+		output->flags |= READFLAG;
+	}
+		/* if not set to exclusive, set the inclusive flag */
+	if(!x){
+		output->flags |= INCFLAG;
+	}
+	strcpy(output->lexName, lexName);
+	
+	
+	#if CACHESIZE > 0
+	
+	if(r && w){
+		//#TODO include hash vs sort cache logic flags
+		/* if we will be reading and writing the same lexicon, setup a
+		 * cache for this lexicon to speed up rewrites */
+		struct cacheList* newCache = calloc(1, sizeof(struct cacheList));
+		#ifdef HASHCACHE
+		newCache->cache = calloc(CACHESIZE, sizeof(denseRIV*));
+		#else
+		#ifdef SORTCACHE
+		newCache->cache = calloc(CACHESIZE+1, sizeof(denseRIV*));
+		output->treeRoot = calloc(1, sizeof(struct treenode));
+		#endif
+		#endif
+		output->flags |= CACHEFLAG;
+		
+		output->cache = newCache->cache;
+		newCache->next = rootCache;
+		if(rootCache){
+			rootCache->prev = newCache;
+		}
+		rootCache = newCache;
+		output->listPoint = newCache;
+		
+		struct sigaction action = {0};
+		action.sa_sigaction = signalSecure;
+		action.sa_flags = SA_SIGINFO;
+		
+		for(int i=1; i<27; i++){
+			sigaction(i,&action,NULL);
+		}
+	}
+	#endif
+
+	return output;
+}
+void lexClose(LEXICON* toClose){
+	
+#if CACHESIZE>0 
+	if(toClose->flags & CACHEFLAG){
+		if(cacheDump(toClose->cache)){
+			puts("cache dump failed, some lexicon data was lost");
+		}
+		struct cacheList* listPoint = toClose->listPoint;
+		if(listPoint->prev){
+			listPoint->prev->next = toClose->listPoint->next;
+		}
+		if(listPoint->next){
+			listPoint->next->prev = toClose->listPoint->prev;
+		}
+		free(listPoint);
+	}
+#endif
+	free(toClose);
+}
+
+
+
+#if CACHESIZE > 0
+denseRIV* cacheCheckOnPull(LEXICON* lexicon, char* word){
+	#ifdef HASHCACHE
+	srand(wordtoSeed(word));
+	int hash = rand()%CACHESIZE;
+	if(lexicon->cache[hash]){
+		if(!strcmp(word, lexicon->cache[hash]->name)){
+			/* if word is cached, pull from cache and exit */
+			return lexicon->cache[hash];
+		}
+	}
+	return NULL;
+	#endif
+	#ifdef SORTCACHE
+	
+	return treeSearch(lexicon->treeRoot, word);
+
+	#endif
+}
+
+int cacheCheckOnPush(LEXICON* lexicon, denseRIV* RIVout){
+	
+	/* if our RIV was cached already, no need to play with it */
+	if(RIVout->cached == lexicon){
+		return 1;
+	}
+	#ifdef HASHCACHE
+	srand(wordtoSeed(RIVout->name));
+	int hash = rand()%CACHESIZE;
+	
+	/* if there is no word in this cache slot */
+	if(!lexicon->cache[hash]){
+		/* push to cache instead of file */
+		lexicon->cache[hash] = RIVout;
+		lexicon->cache[hash]->cached = lexicon;
+		return 1;
+	/*if the current RIV is more frequent than the RIV holding its slot */
+	}
+	if(RIVout->frequency > lexicon->cache[hash]->frequency ){
+		/* push the lower frequency cache entry to a file */
+		fLexPush(lexicon, lexicon->cache[hash]);
+		/* replace this cache-slot with the current vector */
+
+		lexicon->cache[hash] = RIVout;
+		lexicon->cache[hash]->cached = lexicon;
+		
+		return 1;
+	}
+	return 0;
+	#endif /* HASHCACHE */
+	#ifdef SORTCACHE
+	denseRIV* *cache_slider = lexicon->cache;
+	while(*cache_slider){
+		if(RIVout->frequency > (*cache_slider)->frequency){
+			memcpy(cache_slider+1, cache_slider, CACHESIZE-(cache_slider-lexicon->cache));
+			if(lexicon->cache[CACHESIZE]){
+				
+				fLexPush(lexicon, lexicon->cache[CACHESIZE]);
+				//remove tree element
+				treecut(lexicon->treeRoot, RIVout->name);
+				lexicon->cache[CACHESIZE] = NULL;
+			}
+			RIVout->cached = lexicon;
+			*cache_slider = RIVout;
+			//add tree element
+			RIVinsert(lexicon->treeRoot, RIVout->name, RIVout);
+			
+			return 1;
+		}
+			
+		cache_slider++;
+	}
+	if(cache_slider-lexicon->cache < CACHESIZE){
+		RIVout->cached = lexicon;
+		*cache_slider = RIVout;
+		RIVinsert(lexicon->treeRoot, RIVout->name, RIVout);
+		//add tree element
+		return 1;
+	}
+	return 0;
+	#endif /* SORTCACHE */
+}
+
+#endif
+denseRIV* lexPull(LEXICON* lexicon, char* word){
+	
+	denseRIV* output = NULL;
+	
+	#if CACHESIZE > 0
+	if(lexicon->flags & CACHEFLAG){
+		/* if there is a cache, first check if the word is cached */
+		if((output = cacheCheckOnPull(lexicon, word))){
+			return output;
+		}
+	}
+	#endif /* CACHESIZE > 0 */
+
+	/* if not, attempt to pull the word data from lexicon file */
+	char pathString[200];
+
+	sprintf(pathString, "%s/%s", lexicon->lexName, word);
+
+	FILE *lexWord = fopen(pathString, "rb");
+
+	/* if this lexicon file already exists */
+	if(lexWord){
+		/* pull data from file */
+		
+		output = fLexPull(lexWord);
+		strcpy(output->name, word);
+		fclose(lexWord);
+	}else{
+		/* if lexicon is set to inclusive (can gain new words) */
+		if(lexicon->flags & INCFLAG){
+			
+			/*if file does not exist, return a 0 vector (word is new to the lexicon) */
+			output = calloc(1, sizeof(denseRIV));
+			strcpy(output->name, word);
+		}
+		/*if lexicon is set to exclusive, will return a NULL pointer instead of a 0 vector */
+	}
+
+	
+
+	return output;
+}
+
+int lexPush(LEXICON* lexicon, denseRIV* RIVout){
+	
+	#if CACHESIZE > 0
+	if(lexicon->flags & CACHEFLAG){
+	/* check the cache to see if it belongs in cache */
+		if(cacheCheckOnPush(lexicon, RIVout)){
+			/* if the cache check returns 1, it has been dealth with in cache */
+			return 0;
+		}
+	}
+	
+	#endif
+	
+	
+	/* push to the lexicon */
+	return fLexPush(lexicon, RIVout);
+	
+}
+
+int saturationForStaging(denseRIV* output){
+	
+	/* key/value pairs will be loaded to a worst-case sized temporary slot */
+
+	int* count = IOstagingSlot;
+	*count = 0;
+	*(count+1) = 0;
+	*(count+2) = output->frequency;
+	*(count+3) = output->contextSize;
+	*(float*)(count+4) = output->magnitude;
+	
+	int* locations = IOstagingSlot+5;
+	int* values = IOstagingSlot-RIVSIZE;;
+	int* locations_slider = locations;
+	int* values_slider = values;
+	for(int i=0; i<RIVSIZE; i++){
+		
+		/* act only on non-zeros */
+		if(output->values[i]){
+			
+			/* assign index to locations */
+			*(locations_slider++) = i;
+			
+			/* assign value to values */
+			*(values_slider++) = output->values[i];
+			
+			/* track size of forming sparseRIV */
+			*count += 1;
+		}
+	}
+		
+	/* copy values into slot immediately after locations */
+	memcpy(locations+*count, values, (*count)*sizeof(int));
+	
+	return *count;
+}
+int fLexPush(LEXICON* lexicon, denseRIV* output){	
+	char pathString[200] = {0};
+	
+	/* word data will be placed in a (new?) file under the lexicon directory
+	 * in a file named after the word itself */
+	sprintf(pathString, "%s/%s", lexicon->lexName, output->name);
+	
+	int saturation = saturationForStaging(output);
+	
+	if( saturation < RIVSIZE/2){
+		
+		FILE *lexWord = fopen(pathString, "wb");
+		if(!lexWord){
+			printf("lexicon push has failed for word: %s\nconsider cleaning inputs", output->name);
+			return 1;
+		}
+		fwrite(IOstagingSlot, (saturation*2)+5, sizeof(int), lexWord);
+		fclose(lexWord);
+	}else{
+		output->cached = 0;
+		FILE *lexWord = fopen(pathString, "wb");
+		if(!lexWord){
+			printf("lexicon push has failed for word: %s\nconsider cleaning inputs", output->name);
+			return 1;
+		}
+		fwrite(((int*)&output->cached), sizeof(int), RIVSIZE+5, lexWord);
+		
+		fclose(lexWord);
+	}
+	
+	free(output);
+
+	return 0;
+}
+
+denseRIV* fLexPull(FILE* lexWord){
+	denseRIV *output = calloc(1,sizeof(denseRIV));
+	size_t typeCheck;
+	/* get metadata for vector */
+	if(!fread(&typeCheck, 1, sizeof(size_t), lexWord)){
+		return NULL;
+	}
+	int flag = 0;
+	/* first value stored is the value count if sparse, and 0 if dense */
+	if (typeCheck){
+		/* pull as sparseVector */
+		/*sparseRIV* temp = (sparseRIV*) (IOstagingSlot-(sizeof(sparseRIV)/sizeof(int)-IODISPLACEMENT));
+		
+		temp->count = typeCheck;
+		temp->locations = IOstagingSlot+5;
+		temp->values = temp->locations+temp->count;		
+		
+		if (fread(&(temp->frequency), sizeof(int), (typeCheck* 2)+3, lexWord) != typeCheck*2 + 3){
+			printf("vector read failure");
+			return NULL;
+		}*/
+	
+		sparseRIV temp;
+		temp.count = typeCheck;
+		temp.locations = malloc(temp.count*2*sizeof(int));
+		temp.values = temp.locations+temp.count;
+		flag+= fread(&output->frequency, 1, sizeof(int), lexWord);
+		flag += fread(&output->contextSize, 1, sizeof(unsigned int), lexWord);
+		flag+= fread(&output->magnitude, 1, sizeof(float), lexWord);
+		flag += fread(temp.locations, temp.count, sizeof(int), lexWord);
+		flag+= fread(temp.values, temp.count, sizeof(int), lexWord);
+
+
+		addS2D(output->values, temp);
+	}else{
+		/* typecheck is thrown away, just a flag in this case */
+		flag+= fread(&output->frequency, 1, sizeof(int), lexWord);
+		flag += fread(&output->contextSize, 1, sizeof(unsigned int), lexWord);
+		flag +=fread(&output->magnitude, 1, sizeof(float), lexWord);
+		/*if(fread(&output->frequency, sizeof(int), RIVSIZE+3, lexWord) != RIVSIZE+3){
+			printf("vector read failure");
+			return NULL;
+		}*/
+	}
+
+
+	output->cached = 0;
+
+	return output;
+
+}
+
+void signalSecure(int signum, siginfo_t *si, void* arg){
+	while(rootCache){
+		if(cacheDump(rootCache->cache)){
+			puts("cache dump failed, some lexicon data lost");
+		}
+		rootCache = rootCache->next;
+		
+	}
+	signal(signum, SIG_DFL);
+	kill(getpid(), signum);
+}
+int cacheDump(denseRIV* *toDump){
+	
+	int flag = 0;
+	denseRIV* *toDump_slider = toDump;
+	#ifdef HASHCACHE
+	denseRIV* *toDump_stop = toDump+CACHESIZE;
+	while(toDump_slider<toDump_stop){
+		if(*toDump_slider){
+
+			flag += fLexPush((LEXICON*)(*toDump_slider)->cached,*toDump_slider);
+		}
+		toDump_slider++;
+	}
+	#else
+	#ifdef SORTCACHE
+	while(*toDump_slider){
+		flag += fLexPush((LEXICON*)(*toDump_slider)->cached,*toDump_slider);
+		
+		toDump_slider++;
+	
+	}
+	#endif
+	#endif
+	free(toDump);
+	
+	return flag;
+}
+#endif
--- a/RIVread.c
+++ b/RIVread.c
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <error.h>
+#include <string.h>
+//#define HASHCACHE
+#define RIVSIZE 50000
+#define NONZEROS 4
+#define CACHESIZE 27000
+#include "RIVtools.h"
+
+//this program reads a directory full of files, and adds all context vectors (considering file as context)
+//to all words found in these files. this is used to create a lexicon, or add to an existing one
+
+void fileGrind(FILE* textFile);
+void addContext(denseRIV* lexRIV, sparseRIV context);
+void directoryGrind(char *rootString);
+void lineGrind(char* textLine);
+LEXICON* lp;
+//int COUNTY = 0;
+int main(int argc, char *argv[]){
+
+	char pathString[1000];
+	lp = lexOpen("lexicon", "rw");
+	//we open the lexicon, if it does not yet exist, it will be created
+	
+	
+	//we format the root directory, preparing to scan its contents
+	
+	strcpy(pathString, argv[1]);
+	strcat(pathString, "/");
+	//ensure that the targeted root directory exists
+	
+	struct stat st;
+	if(stat(pathString, &st) == -1) {
+		printf("directory doesn't seem to exist");
+		return 1;
+	}
+	//we will scan the directory, adding all data to our lexicon, as seen inside
+	directoryGrind(pathString);
+
+	//we close the lexicon again, ensuring all data is secured
+	lexClose(lp);
+	return 0;
+}
+
+//mostly a standard Dirent-walk
+void directoryGrind(char *rootString){
+/* *** begin Dirent walk *** */
+	char pathString[2000];
+	DIR *directory;
+	struct dirent *files = 0;
+
+	if(!(directory = opendir(rootString))){
+		printf("location not found, %s\n", rootString);
+		return;
+	}
+
+	while((files=readdir(directory))){
+		
+		if(files->d_type == DT_DIR){
+
+			continue;
+		}
+			
+		sprintf(pathString, "%s/%s", rootString, files->d_name);
+/* *** end dirent walk, begin meat of function  *** */
+		
+		//check for non-txt files
+		char *fileEnding = pathString+strlen(pathString)-4;
+		if(strcmp(fileEnding, ".txt")){
+			printf("skipped: %s\n", files->d_name); 
+			continue;
+		}
+		//puts(files->d_name);
+		//open a file within root directory
+		FILE *input = fopen(pathString, "r");
+		if(input){
+			//process this file and add it's data to lexicon
+			//fprintf(stderr, "***%d", COUNTY++);
+			fileGrind(input);
+			
+			fclose(input);
+		}
+	}
+	closedir(directory);
+}
+
+
+void fileGrind(FILE* textFile){
+	char textLine[10000];
+	// included python script separates paragraphs into lines
+	//int i=0;
+	while(fgets(textLine, 9999, textFile)){
+		//printf("line: %d\n", i++);
+		if(!strlen(textLine)) continue;
+		if(feof(textFile)) break;
+	
+		//process each line as a context set
+		lineGrind(textLine);
+	}
+}
+//form context vector from contents of text, then add that vector to
+//all lexicon entries of the words contained
+void lineGrind(char* textLine){
+	//extract a context vector from this text set
+	sparseRIV contextVector = textToL2(textLine);
+	if(contextVector.contextSize <= 1){
+		free(contextVector.locations);
+		return;
+	}
+		
+	denseRIV* lexiconRIV;
+	//identify stopping point in line read
+	char* textEnd = textLine + strlen(textLine)-1;
+	int displacement = 0;
+	char word[100] = {0};
+	while(textLine<textEnd){
+		sscanf(textLine, "%99s%n", word, &displacement);
+		//we ensure that each word exists, and is free of unwanted characters
+		
+		textLine += displacement+1;
+		if(!(*word))continue;
+
+		if(!isWordClean((char*)word)){
+			continue;
+		}
+		
+		
+		//we pull the vector corresponding to each word from the lexicon
+		//if it's a new word, lexPull returns a 0 vector
+		lexiconRIV= lexPull(lp, word);
+
+		//we add the context of this file to this wordVector
+		addContext(lexiconRIV, contextVector);
+		
+		//we remove the sub-vector corresponding to the word itself
+		subtractThisWord(lexiconRIV);
+		
+		//we log that this word has been encountered one more time
+		lexiconRIV->frequency += 1;
+		
+		//and finally we push it back to the lexicon for permanent storage
+		lexPush(lp, lexiconRIV);
+		
+		
+	}
+	//free the heap allocated context vector data
+	free(contextVector.locations);
+	
+	
+	
+	
+	
+	
+	
+	
+}
+
+void addContext(denseRIV* lexRIV, sparseRIV context){
+		
+		//add context to the lexRIV, (using sparse-dense vector comparison)
+		addS2D(lexRIV->values, context);
+		
+		//log the "size" of the vector which was added
+		//this is not directly necessary, but is useful metadata for some analises
+		lexRIV->contextSize += context.contextSize;
+		
+}
+
+
+	
--- a/RIVtools.h
+++ b/RIVtools.h
+#ifndef RIVTOOLS_H_
+#define RIVTOOLS_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "RIVLower.h"
+#include "RIVaccessories.h"
+#include "RIVlexicon.h"
+
+
+/* fileToL2 takes an input file, reads words (delimiting on " " and "\n") 
+ * and returns a sparse RIV which is the vector sum of the base RIVs of each 
+ * word contained
+ */
+sparseRIV fileToL2(FILE *input);
+
+/* fileToL2Clean operates the same as fileToL2 butkeeps only words 
+ * containing lowercase letters and the '_' symbol
+ * this is important if you will be lexPush-ing those words later
+ */
+sparseRIV fileToL2Clean(FILE *data);
+
+/* like fileToL2 but takes a block of text */
+sparseRIV textToL2(char *text);
+
+/*cosine determines the "similarity" between two RIVs. */
+double cosCompare(denseRIV baseRIV, sparseRIV comparator);
+
+/*used for analysis of lexicon vectors (not simply accumulation)
+ * to avoid overflow of even a 64 bit integer, vectors must be normalized
+ * this is an experimental approximation of true normal, which should yield 
+ * some extra data about the nature of this word's context
+ */
+sparseRIV normalize(denseRIV input, int factor);
+
+
+
+/* calculates the magnitude of a sparseVector */ //TODO contain integer overflow in square process
+double getMagnitudeSparse(sparseRIV input);
+/* same for denseVector */
+double getMagnitudeDense(denseRIV *input); //TODO consolidate these into one function
+	
+sparseRIV textToL2(char *text){
+	int wordCount = 0;
+	char word[100] = {0};
+
+	int denseTemp[RIVSIZE] = {0};
+	/* locations (implicit RIV) are temp stored in temp block, and moved 
+	 * to permanent home in consolidation */
+	int *locations = RIVKey.h_tempBlock;
+	int locationCount = 0;
+	int displacement = 0;;
+	char* textEnd = text+strlen(text)-1;
+
+	while(text<textEnd){
+		sscanf(text, "%99s%n", word, &displacement);
+		text += displacement+1;
+		if(!displacement){
+			break;
+		}
+		if(!(*word)){
+			break;
+		}
+
+		/* if this word would overflow the locations block, map it to the denseVector */
+		if((locationCount+NONZEROS)>TEMPSIZE){
+			addI2D(denseTemp, locations, locationCount);
+			locationCount = 0;
+		}
+		/* add word's L1 RIV to the accumulating implicit RIV */
+		makeSparseLocations(word, locations, locationCount);
+		locationCount+= NONZEROS;
+		wordCount++;
+	}
+	/* map remaining locations to the denseTemp */
+	addI2D(denseTemp, locations, locationCount);
+	sparseRIV output = consolidateD2S(denseTemp);
+
+	/* contextSize stores the number of words read */
+	output.contextSize = wordCount;
+	return output;
+}
+
+sparseRIV fileToL2(FILE *data){
+	char word[100] = {0};
+
+	/* locations (implicit RIV) are temporarily stored in temp block, 
+	 * and moved to permanent home in consolidation */
+	int *locations = RIVKey.h_tempBlock;
+	int locationCount = 0;
+	int denseTemp[RIVSIZE] = {0};
+	int wordCount = 0;
+	while(fscanf(data, "%99s", word)){
+
+		if(feof(data)){
+			break;
+		}
+		if(!(*word)){
+			break;
+		}
+
+		/* if this word would overflow the locations block, map it to the denseVector */
+		if((locationCount+NONZEROS)>TEMPSIZE){
+			addI2D(denseTemp, locations, locationCount);
+			locationCount = 0;
+		}
+		/* add word's L1 RIV to the accumulating implicit RIV */
+		makeSparseLocations(word, locations, locationCount);
+		locationCount+= NONZEROS;
+		wordCount++;
+	}
+	/* map remaining locations to the denseTemp */
+	addI2D(denseTemp, locations, locationCount);
+	sparseRIV output = consolidateD2S(denseTemp);
+
+	/* contextSize records the number of words in this file */
+	output.contextSize = wordCount;
+	fseek(data, 0, SEEK_SET);
+	return output;
+}
+
+sparseRIV fileToL2Clean(FILE *data){
+
+	int denseTemp[RIVSIZE] = {0};
+	char word[100] = {0};
+	int *locations = RIVKey.h_tempBlock;
+	unsigned int wordCount = 0;
+
+	int locationCount = 0;
+	while(fscanf(data, "%99s", word)){
+
+		if(feof(data)){
+			break;
+		}
+
+		if(!(*word)){
+			break;
+		}
+		/* if the word is not clean, skip it */
+		if(!isWordClean((char*)word)){
+			continue;
+		}
+		/* if this word would overflow the locations block, map it to the denseVector */
+		if((locationCount+NONZEROS)>TEMPSIZE){
+			addI2D(denseTemp, locations, locationCount);
+			locationCount = 0;
+		}
+		/* add word's L1 RIV to the accumulating implicit RIV */
+		makeSparseLocations(word, locations, locationCount);
+		locationCount+= NONZEROS;
+		wordCount++;
+	}
+	/* map remaining locations to the denseTemp */
+	addI2D(denseTemp, locations, locationCount);
+	sparseRIV output = consolidateD2S(denseTemp);
+
+	/* frequency records the number of words in this file */
+	output.contextSize = locationCount/NONZEROS;
+	fseek(data, 0, SEEK_SET);
+	return output;
+}
+
+double cosCompare(denseRIV baseRIV, sparseRIV comparator){
+
+	long long int dot = 0;
+	int* locations_stop = comparator.locations+comparator.count;
+	int* locations_slider = comparator.locations;
+	int* values_slider = comparator.values;
+	while(locations_slider<locations_stop){
+
+		/* we calculate the dot-product to derive the cosine 
+		 * comparing sparse to dense by index*/
+		dot += *values_slider * baseRIV.values[*locations_slider];
+		locations_slider++;
+		values_slider++;
+
+	}
+	/*dot divided by product of magnitudes */
+
+	return dot/(baseRIV.magnitude*comparator.magnitude);
+}
+
+double getMagnitudeSparse(sparseRIV input){
+	size_t temp = 0;
+	int *values = input.values;
+	int *values_stop = values+input.count;
+	while(values<values_stop){
+		/* we sum the squares of all elements */
+		temp += (*values)*(*values);
+		values++;
+	}
+	/* we take the root of that sum */
+	return sqrt(temp);
+}
+
+double getMagnitudeDense(denseRIV *input){
+	size_t temp = 0;
+	int *values = input->values;
+	int *values_stop = values+RIVSIZE;
+	while(values<values_stop){
+		if(*values){
+			temp += (*values)*(*values);
+		}
+		values++;
+	}
+	return sqrt(temp);
+}
+
+
+
+sparseRIV normalize(denseRIV input, int factor){
+	/* multiplier is the scaling factor we need to bring our vector to the right size */
+	float multiplier = (float)factor/(input.contextSize);
+
+	/* write to temp slot, data will go to a permanent home lower in function */
+	int* locations = RIVKey.h_tempBlock+RIVSIZE;
+	int* values = locations+RIVSIZE;
+	
+	int count = 0;
+	for(int i=0; i<RIVSIZE; i++){
+		/* if this point is 0, skip it */
+		if(!input.values[i]) continue;
+		
+		/* record position and value in the forming sparse vector */
+		locations[count] = i;
+		values[count]= round(input.values[i]*multiplier);
+		
+		/* drop any 0 values */
+		if(values[count])count++; 
+	}
+	sparseRIV output;
+	output.count = count;
+	/* for memory conservation, both datasets are put inline with each other */
+	output.locations = (int*) malloc(count*2*sizeof(int));
+	output.values = output.locations+count;
+	
+	/* copy the data from tempBlock into permanent home */
+	memcpy(output.locations, locations, count*sizeof(int));
+	memcpy(output.values, values, count*sizeof(int));
+	
+	/* carry metadata */
+	strcpy(output.name, input.name);
+	output.magnitude = getMagnitudeSparse(output);
+	output.contextSize = input.contextSize;
+	output.frequency = input.frequency;
+	return output;
+}
+
+#endif
--- a/RIVtoolsCPUlinux.h
+++ b/RIVtoolsCPUlinux.h
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "RIVLower.h"
+#include "RIVaccessories.h"
+
+
+
+/* lexPush writes a denseRIV to a file for permanent storage */
+int lexPush(denseRIV RIVout);
+/* lexPull reads an existing lexicon entry (under directory "lexicon")
+ * and creates a denseRIV with those attributes.
+ * if the file does not exist, it creates a 0 vector with the name of word
+ */
+denseRIV lexPull(char* word);
+
+/* fileToL2 takes an input file, reads words (delimiting on " " and "\n") 
+ * and returns a sparse RIV which is the vector sum of the base RIVs of each 
+ * word contained
+ */
+sparseRIV fileToL2(FILE *input);
+
+/* fileToL2Clean operates the same as fileToL2 butkeeps only words 
+ * containing lowercase letters and the '_' symbol
+ * this is important if you will be lexPush-ing those words later
+ */
+sparseRIV fileToL2Clean(FILE *data);
+
+/*filetoL2direct is an experiment in simplifying the process.  it's slow */
+sparseRIV fileToL2direct(FILE *data);
+
+/*cosine determines the "similarity" between two RIVs. */
+float cosCompare(denseRIV baseRIV, sparseRIV comparator);
+
+/*currently unused */
+sparseRIV wordtoL2(char* word);
+
+/* converts an implicit RIV (a set of unvalued locations) into a formal 
+ * sparse RIV.  this chooses the best method to perform the consolidation
+ * and launches that function */
+sparseRIV consolidateI2S(int *implicit, size_t valueCount);
+
+/* like fileToL2 but takes a block of text */
+sparseRIV text2L2(char *text);
+sparseRIV text2L2(char *text){
+	unsigned int blockSize;
+	char word[100] = {0};
+	
+	/* locations (implicit RIV) are temp stored in temp block, and moved 
+	 * to permanent home in consolidation */
+	int *locations = RIVKey.h_tempBlock;
+	int locationCount = 0;
+	int displacement;
+	
+	while(sscanf(text, "%99s%n", word, &displacement)){
+		text += displacement+1;
+		if(!displacement){
+			break;
+		}
+		
+		if(!(*word)){
+			break;
+		}
+
+		blockSize = locationCount+NONZEROS;
+		/* if this word would overflow the locations block, grow it */
+		if(blockSize>RIVKey.tempSize){
+			RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
+			locations = RIVKey.h_tempBlock;
+			RIVKey.tempSize+=NONZEROS;
+		}
+		
+		/* add word's L1 RIV to the accumulating implicit RIV */
+		makeSparseLocations((unsigned char*)word, locations, locationCount);
+		locationCount+= NONZEROS;
+		
+	}
+	sparseRIV output = consolidateI2S(locations, locationCount);
+	
+	/* frequency records the number of words in this file, untill frequency
+	 * is needed to hold some more useful data point */
+	output.frequency = locationCount/NONZEROS;
+	output.boolean = 1;
+	return output;
+}
+	
+sparseRIV fileToL2(FILE *data){
+	unsigned int blockSize;
+	unsigned char word[100] = {0};
+	
+	/* locations (implicit RIV) are temp stored in temp block, and moved 
+	 * to permanent home in consolidation */
+	int *locations = RIVKey.h_tempBlock;
+	int locationCount = 0;
+	
+	
+	while(fscanf(data, "%99s", word)){
+	
+		if(feof(data)){
+			break;
+		}
+		if(!(*word)){
+			break;
+		}
+		
+		blockSize = locationCount+NONZEROS;
+		/* if this word would overflow the locations block, grow it */
+		if(blockSize>RIVKey.tempSize){
+			RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
+			locations = RIVKey.h_tempBlock;
+			RIVKey.tempSize+=NONZEROS;
+		}
+		
+		/* add word's L1 RIV to the accumulating implicit RIV */
+		makeSparseLocations(word, locations, locationCount);
+		locationCount+= NONZEROS;
+		
+	}
+	
+	sparseRIV output = consolidateI2S(locations, locationCount);
+	
+	/* frequency records the number of words in this file */
+	output.frequency = locationCount/NONZEROS;
+	output.boolean = 1;
+	
+	return output;
+}
+
+sparseRIV fileToL2Clean(FILE *data){
+	
+		
+	unsigned char word[100] = {0};
+	int *locations = RIVKey.h_tempBlock;
+	unsigned int blockSize;
+	
+	int locationCount = 0;
+	
+	while(fscanf(data, "%99s", word)){
+		
+		if(feof(data)){
+			break;
+		}
+		
+		if(!(*word)){
+			break;
+		}
+		/* if the word is not clean, skip it */
+		if(!isWordClean((char*)word)){
+			continue;
+		}
+		blockSize = locationCount+NONZEROS;
+		if(blockSize>RIVKey.tempSize){
+			RIVKey.h_tempBlock = (int*)realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
+			locations = RIVKey.h_tempBlock;
+			RIVKey.tempSize+=NONZEROS;
+		}
+
+		makeSparseLocations(word, locations, locationCount);
+		locationCount+= NONZEROS;
+		
+	}
+	
+	sparseRIV output = consolidateI2S(locations, locationCount);
+	
+	/* frequency records the number of words in this file */
+	output.frequency = locationCount/NONZEROS;
+	output.boolean = 1;
+	return output;
+}
+
+sparseRIV consolidateI2S(int *implicit, size_t valueCount){
+	if(valueCount<RIVKey.I2SThreshold){
+		 /*direct method is faster on small datasets, but has geometric scaling on large datasets */
+		return consolidateI2SDirect(implicit, valueCount);
+	}else{
+		/* optimized for large datasets */		
+		return consolidateI2SIndirect(implicit, valueCount);
+	}	
+	
+}
+void aggregateWord2D(denseRIV destination, char* word){
+	
+	
+	srand(wordtoSeed((unsigned char*)word));
+	for(int i=0; i<NONZEROS; i++){
+
+		destination.values[(rand()%RIVSIZE)] +=1;
+		destination.values[(rand()%RIVSIZE)] -= 1;
+	}		
+}
+
+float cosCompare(denseRIV baseRIV, sparseRIV comparator){
+		
+	int dot = 0;
+	int n = comparator.count;
+	while(n){
+		n--;
+		/* we calculate the dot-product to derive the cosine 
+		 * comparing sparse to dense by index*/
+		//dot += values[i]*baseRIV.values[locations[i]];
+		dot += comparator.values[n] * baseRIV.values[comparator.locations[n]];
+		
+		printf("%d, %d, %d\n",baseRIV.values[comparator.locations[n]],comparator.values[n] , n);
+
+	}
+	/*dot divided by product of magnitudes */
+	float cosine = dot/(baseRIV.magnitude*comparator.magnitude);
+
+	return cosine;
+}
+
+float getMagnitudeSparse(sparseRIV input){
+	unsigned long long int temp = 0;
+	int *values = input.values;
+	int *values_stop = values+input.count;
+	while(values<values_stop){	
+		temp += (*values)*(*values);
+		values++;
+			
+	}
+	 
+	input.magnitude = sqrt(temp);
+	return input.magnitude;
+}
+
+denseRIV lexPull(char* word){
+	#if CACHESIZE > 0
+	
+	/* if there is a cache, first check if the word is cached */
+	srand(wordtoSeed((unsigned char*)word));
+	int hash = rand()%CACHESIZE;
+	if(!strcmp(word, RIVKey.RIVCache[hash].name)){
+		
+		/* if word is cached, pull from cache and exit */
+		return RIVKey.RIVCache[hash];
+	}
+	#endif /* CACHESIZE > 0 */
+	
+	/* if not, attempt to pull the word data from lexicon file */
+	denseRIV output;
+	
+	char pathString[200];
+	
+	sprintf(pathString, "lexicon/%s", word);
+	FILE *lexWord = fopen(pathString, "rb");
+	
+	/* if this lexicon file already exists */
+	if(lexWord){
+		/* pull data from file */
+		output = fLexPull(lexWord);
+		fclose(lexWord);
+	}else{
+		/*if file does not exist, return a 0 vector (word is new to the lexicon */ //#TODO enable NO-NEW features to protect mature lexicons? 
+		output = denseAllocate();
+	}
+	
+	strcpy(output.name, word);
+	return output;
+}
+int lexPush(denseRIV RIVout){
+	#if CACHESIZE == 0
+	/* if there is no cache, simply push to file */
+	fLexPush(RIVout);
+	return 0;
+	#else /* CACHESIZE != 0 */
+	
+	/* if our RIV was cached, there are two options (hopefully)
+	 * either the RIV is still cached, and the data has been updated 
+	 * to the cache or the RIV was pushed out from under it, 
+	 * in which case it has already been pushed! move on*/
+	 
+	if(RIVout.cached){
+		return 0;
+	}
+	
+	srand(wordtoSeed((unsigned char*)RIVout.name));
+	int hash = rand()%CACHESIZE;
+	
+	if(!RIVKey.RIVCache[hash].cached){
+		/* if there is no word in this cache slot, push to cache instead of file */
+		RIVKey.RIVCache[hash] = RIVout;
+		RIVKey.RIVCache[hash].cached = 1;
+		return 0;
+		
+	/*if the current RIV is more frequent than the RIV holding its slot */
+	}else if(*(RIVout.frequency) > *(RIVKey.RIVCache[hash].frequency) ){
+		/* push the current cache entry to a file */
+		int diag = fLexPush(RIVKey.RIVCache[hash]);
+		/* push the current RIV to cache */
+		
+		RIVKey.RIVCache[hash] = RIVout;
+		RIVKey.RIVCache[hash].cached = 1;
+		return diag;
+	}else{
+		/* push current RIV to file */
+		fLexPush(RIVout);
+	}
+	return 0;
+	#endif /* CACHESIZE == 0 */
+
+}
+sparseRIV fileToL2direct(FILE *data){;
+	unsigned char word[100] = {0};
+	denseRIV denseTemp;
+	// a temporary dense RIV is stored in the tempBlock 
+	denseTemp.values = RIVKey.h_tempBlock;
+	memset(RIVKey.h_tempBlock, 0, RIVSIZE*sizeof(int));
+	int count = 0;
+	while(fscanf(data, "%99s", word)){
+		count++;
+		if(feof(data)){
+			break;
+		}
+		if(!(*word)){
+			break;
+		}
+		
+		
+		// add word's L1 RIV to the accumulating implicit RIV 
+		aggregateWord2D(denseTemp, (char*)word);
+		
+	}
+	sparseRIV output = consolidateD2S(denseTemp.values);
+	
+	// frequency records the number of words in this file 
+	output.frequency = count;
+	output.boolean = 1;
+	return output;
+}
+
--- a/RIVtoolsMorphic.h
+++ b/RIVtoolsMorphic.h
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "RIVLowerMorphic.h"
+#include "RIVaccessories.h"
+
+/* lexPush writes a denseRIV to a file for permanent storage */
+int lexPush(denseRIV RIVout);
+/* lexPull reads an existing lexicon entry (under directory "lexicon")
+ * and creates a denseRIV with those attributes.
+ * if the file does not exist, it creates a 0 vector with the name of word
+ */
+denseRIV lexPull(char* word);
+
+/* fileToL2 takes an input file, reads words (delimiting on " " and "\n") 
+ * and returns a sparse RIV which is the vector sum of the base RIVs of each 
+ * word contained
+ */
+sparseRIV fileToL2(FILE *input);
+/* fileToL2Clean operates the same as fileToL2 butkeeps only words 
+ * containing lowercase letters and the '_' symbol
+ * this is important if you will be lexPush-ing those words later
+ */
+sparseRIV fileToL2Clean(FILE *data);
+
+sparseRIV fileToL2direct(FILE *data);
+/*cosine determines the "similarity" between two RIVs. */
+float cosCompare(denseRIV baseRIV, sparseRIV comparator);
+
+sparseRIV wordtoL2(char* word);
+
+sparseRIV consolidateI2S(int *implicit, size_t valueCount);
+sparseRIV text2L2(char *text);
+sparseRIV text2L2(char *text){
+	unsigned int blockSize;
+	char word[100] = {0};
+	
+	/* locations (implicit RIV) are temp stored in temp block, and moved 
+	 * to permanent home in consolidation */
+	int *locations = RIVKey.h_tempBlock;
+	unsigned int locationCount = 0;
+	int displacement;
+	
+	while(sscanf(text, "%99s%n", word, &displacement)){
+		text += displacement+1;
+		if(!displacement){
+			break;
+		}
+		
+		if(!(*word)){
+			break;
+		}
+
+		blockSize = locationCount+NONZEROS;
+		/* if this word would overflow the locations block, grow it */
+		if(blockSize>RIVKey.tempSize){
+			RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
+			locations = RIVKey.h_tempBlock;
+			RIVKey.tempSize+=NONZEROS;
+		}
+		
+		/* add word's L1 RIV to the accumulating implicit RIV */
+		makeSparseLocations((unsigned char*)word, locations, locationCount);
+		locationCount+= NONZEROS;
+		
+	}
+	sparseRIV output = consolidateI2S(locations, locationCount);
+	
+	/* frequency records the number of words in this file */
+	*(output.frequency) = locationCount/NONZEROS;
+	output.boolean = 1;
+	return output;
+}
+	
+sparseRIV fileToL2(FILE *data){
+	unsigned int blockSize;
+	unsigned char word[100] = {0};
+	
+	/* locations (implicit RIV) are temp stored in temp block, and moved 
+	 * to permanent home in consolidation */
+	int *locations = RIVKey.h_tempBlock;
+	int locationCount = 0;
+	
+	
+	while(fscanf(data, "%99s", word)){
+	
+		if(feof(data)){
+			break;
+		}
+		if(!(*word)){
+			break;
+		}
+		
+		blockSize = locationCount+NONZEROS;
+		/* if this word would overflow the locations block, grow it */
+		if(blockSize>RIVKey.tempSize){
+			RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
+			locations = RIVKey.h_tempBlock;
+			RIVKey.tempSize+=NONZEROS;
+		}
+		
+		/* add word's L1 RIV to the accumulating implicit RIV */
+		makeSparseLocations(word, locations, locationCount);
+		locationCount+= NONZEROS;
+		
+	}
+	
+	sparseRIV output = consolidateI2S(locations, locationCount);
+	output.frequency = malloc(1*sizeof(int));
+	/* frequency records the number of words in this file */
+	*(output.frequency) = locationCount/NONZEROS;
+	output.boolean = 1;
+	return output;
+}
+
+sparseRIV fileToL2Clean(FILE *data){
+	
+		
+	unsigned char word[100] = {0};
+	int *locations = RIVKey.h_tempBlock;
+	unsigned int blockSize;
+	
+	int locationCount = 0;
+	
+	while(fscanf(data, "%99s", word)){
+		
+		if(feof(data)){
+			break;
+		}
+		
+		if(!(*word)){
+			break;
+		}
+		/* if the word is not clean, skip it */
+		if(!isWordClean((char*)word)){
+			continue;
+		}
+		blockSize = locationCount+NONZEROS;
+		if(blockSize>RIVKey.tempSize){
+			RIVKey.h_tempBlock = (int*)realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
+			locations = RIVKey.h_tempBlock;
+			RIVKey.tempSize+=NONZEROS;
+		}
+
+		makeSparseLocations(word, locations, locationCount);
+		locationCount+= NONZEROS;
+		
+	}
+	
+	sparseRIV output = consolidateI2S(locations, locationCount);
+	
+	/* frequency records the number of words in this file */
+	*(output.frequency) = locationCount/NONZEROS;
+	output.boolean = 1;
+	return output;
+}
+
+sparseRIV consolidateI2S(int *implicit, size_t valueCount){
+	if(valueCount>RIVKey.I2SThreshold){
+		return consolidateI2SIndirect(implicit, valueCount);
+	}else{
+		return consolidateI2SDirect(implicit, valueCount);
+	}	
+	
+}
+void aggregateWord2D(denseRIV destination, char* word){
+	
+	//makeSparseLocations((unsigned char*)word, locationSlot, 0);
+	srand(wordtoSeed((unsigned char*)word));
+	for(int i=0; i<NONZEROS; i++){
+
+		destination.values[(rand()%RIVSIZE)] +=1;
+		destination.values[(rand()%RIVSIZE)] -= 1;
+	}		
+}
+
+float cosCompare(denseRIV baseRIV, sparseRIV comparator){
+		
+	int dot = 0;
+	
+	int *values = comparator.values;
+	int *locations = comparator.locations;
+	int *locations_Stop = locations+comparator.count;
+		
+	while(locations<locations_Stop){
+		/* we calculate the dot-product to derive the cosine */
+		dot += (*values)*(*(baseRIV.values+(*locations)));
+		locations++;
+		values++;
+	}
+	float cosine = dot/(baseRIV.magnitude*comparator.magnitude);
+
+	return cosine;
+}
+
+float getMagnitudeSparse(RIV input){
+	size_t count;
+	if(input.flags & SPARSE){
+		count = input.count;
+	}else{
+		count = RIVSIZE;
+	}
+		
+	unsigned long long int temp = 0;
+	int *values = input.values;
+	int *values_stop = values+count;
+	while(values<values_stop){	
+		temp += (*values)*(*values);
+		values++;
+			
+	}
+	float magnitude = sqrt(temp);
+	input.magnitude = magnitude;
+	return magnitude;
+}
+
+denseRIV lexPull(char* word){
+	#if CACHESIZE > 0
+	
+	/* if there is a cache, first check if the word is cached */
+	srand(wordtoSeed((unsigned char*)word));
+	int hash = rand()%RIVKey.cacheSize;
+	if(!strcmp(word, RIVKey.RIVCache[hash].name)){
+		
+		/* if word is cached, pull from cache and exit */
+		return RIVKey.RIVCache[hash];
+	}
+	#endif /* CACHESIZE > 0 */
+	denseRIV output;
+	
+	
+	
+	char pathString[200];
+	
+	sprintf(pathString, "lexicon/%s", word);
+	FILE *lexWord = fopen(pathString, "rb");
+	
+	/* if this lexicon file already exists */
+	if(lexWord){
+		/* pull data from file */
+		output = fLexPull(lexWord);
+		fclose(lexWord);
+	}else{
+		/*if file does not exist, return a 0 vector */
+		output = denseAllocate();
+	}
+	
+	strcpy(output.name, word);
+	return output;
+}
+int lexPush(denseRIV RIVout){
+	//printf("%s\n", (*RIVout).name);
+	#if CACHESIZE == 0
+		fLexPush(RIVout);
+		return 0;
+	#else /* CACHESIZE != 0 */
+	
+		/* if our RIV was cached, there are two options (hopefully)
+		 * either the RIV is still cached, and the data has been updated to the cache
+		 * or the RIV was pushed out from under it, in which case it has already been pushed*/
+		 
+		if(RIVout.cached){
+			return 0;
+		}
+		
+		srand(wordtoSeed((unsigned char*)RIVout.name));
+		int hash = rand()%RIVKey.cacheSize;
+		
+		if(!RIVKey.RIVCache[hash].cached){
+			RIVKey.RIVCache[hash] = RIVout;
+			RIVKey.RIVCache[hash].cached = 1;
+			return 0;
+			
+		/*if the current RIV is more frequent than the RIV holding it's slot */
+		}else if(*(RIVout.frequency) > *(RIVKey.RIVCache[hash].frequency) ){
+			//scanf("%f", &(*RIVout).magnitude);
+			//printf("%s replacing %s\n", (*RIVout).name, RIVKey.RIVCache[hash].name);
+			/* push the current cache entry to a file */
+			int diag = fLexPush(RIVKey.RIVCache[hash]);
+			/* replace the cache entry with the currrent RIV */
+			
+			RIVKey.RIVCache[hash] = RIVout;
+			RIVKey.RIVCache[hash].cached = 1;
+			return diag;
+		}else{
+			/* push current RIV to file */
+			fLexPush(RIVout);
+		}
+		return 0;
+	#endif /* CACHESIZE == 0 */
+}
+
+sparseRIV fileToL2direct(FILE *data){;
+	unsigned char word[100] = {0};
+	denseRIV denseTemp;
+	// a temporary dense RIV is stored in the tempBlock 
+	denseTemp.values = RIVKey.h_tempBlock;
+	memset(RIVKey.h_tempBlock, 0, RIVSIZE*sizeof(int));
+	int count = 0;
+	while(fscanf(data, "%99s", word)){
+		count++;
+		if(feof(data)){
+			break;
+		}
+		if(!(*word)){
+			break;
+		}
+		
+		
+		// add word's L1 RIV to the accumulating implicit RIV 
+		aggregateWord2D(denseTemp, (char*)word);
+		
+	}
+	sparseRIV output = consolidateD2S(denseTemp.values);
+	
+	// frequency records the number of words in this file 
+	*(output.frequency) = count;
+	output.boolean = 1;
+	return output;
+}
+
--- a/RIVunused.h
+++ b/RIVunused.h
+int* mapS2D(int* destination, sparseRIV input); //#TODO fix int*/denseRIV confusion
+int* addI2D(int* destination, int* locations, size_t seedCount);
+/* cosine determines the "similarity" between two RIVs.  */
+float* cosineCompare(sparseRIV baseRIV, sparseRIV *multipliers, size_t multiplierCount);
+/* magnitudes will be used later in cosine comparison */
+void getMagnitudes(sparseRIV *inputs, size_t RIVCount);
+unsigned char *sscanAdvance(unsigned char **string, unsigned char *word);//unused except in text2l2
+sparseRIV text2L2(unsigned char *text);//unused
+float* cosineCompareUnbound(sparseRIV baseRIV, sparseRIV *multipliers, size_t multiplierCount);
+/*lexPush writes a denseRIV to a file of the same name, under the directory "lexicon"
+ * it is up to the programmer to ensure that the name of the RIV is a valid filename
+ * although it will of course attempt to create the file if it does not exist
+ */
+int* mapS2D(denseRIV destination, sparseRIV input){// #TODO fix destination parameter vs calloc of destination
+	// make sure our destination is a 0 vector 
+	memset(destination.values, 0, RIVKey.RIVsize*sizeof(int));
+	
+	int *locations_slider = input.locations;
+	int *values_slider = input.values;
+	int *locations_stop = locations_slider+input.count;
+	
+	// apply values at an index based on locations 
+	while(locations_slider<locations_stop){
+		destination[*locations_slider] = *values_slider;
+		locations_slider++;
+		values_slider++;
+	}
+	strcpy(destination.name, input.name);
+	*(destination.frequency) = input.frequency;
+	destination.magnitude = input.magnitude;
+	
+	return destination;
+}
+int* addI2D(int* destination, int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination
+	int *locations_slider = locations;
+	int *locations_stop = locations_slider+valueCount;
+
+	/*apply values +1 or -1 at an index based on locations */
+	while(locations_slider<locations_stop){
+	
+		destination[*locations_slider] +=1;
+		locations_slider++;
+		destination[*locations_slider] -= 1;
+		locations_slider++;
+	}
+	
+	
+	return destination;
+}
+float* cosineCompare(sparseRIV baseRIV, sparseRIV *multipliers, size_t multiplierCount){
+	float *results = calloc(multiplierCount, sizeof(float));
+	float* results_slider = results;
+	int *baseDenseRIV = RIVKey.h_tempBlock;
+	memset(RIVKey.h_tempBlock, 0, RIVKey.RIVsize*sizeof(int));
+	addS2D(baseDenseRIV, baseRIV);
+	float cosine;
+	sparseRIV *multipliersStop = multipliers+multiplierCount;
+	
+	/* if two vectors are too different in size, we can ignore the risk of similarity */
+	float minsize = baseRIV.magnitude * .85;
+	float maxsize = baseRIV.magnitude * 1.15;
+	int dot = 0;
+	
+	int *values;
+	int *locations;
+	int *locations_Stop;
+	/* check the baseRIV against each multiplier */
+	while(multipliers<multipliersStop){
+		/* skip a pair if the multiplier has already been culled, or if
+		 * the size difference is too great */
+		if(((*multipliers).boolean) 
+		&& (((*multipliers).magnitude < maxsize) 
+		&& ((*multipliers).magnitude > minsize))){
+			
+			dot = 0;
+			values = (*multipliers).values;
+			locations = (*multipliers).locations;
+			locations_Stop = locations+(*multipliers).count;
+			
+			while(locations<locations_Stop){
+				/* we calculate the dot-product to derive the cosine */
+				dot += (*values)*(*(baseDenseRIV+(*locations)));
+				locations++;
+				values++;
+			}
+			/* magnitudes had better already be calculated at this point*/
+			cosine = dot/((baseRIV.magnitude)*((*multipliers).magnitude));
+			*results_slider = cosine;
+			results_slider++;
+			/* perform the action defined by the COSINEACTION macro */
+			COSINEACTION;
+		}
+		multipliers++;
+		
+	}
+	return results;
+}
+float* cosineCompareUnbound(sparseRIV baseRIV, sparseRIV *multipliers, size_t multiplierCount){
+	float *results = calloc(multiplierCount, sizeof(float));
+	float* results_slider = results;
+	int *baseDenseRIV = RIVKey.h_tempBlock;
+	memset(RIVKey.h_tempBlock, 0, RIVKey.RIVsize*sizeof(int));
+	addS2D(baseDenseRIV, baseRIV);
+	float cosine;
+	sparseRIV *multipliersStop = multipliers+multiplierCount;
+	
+	/* if two vectors are too different in size, we can ignore the risk of similarity */
+	int dot = 0;
+	
+	int *values;
+	int *locations;
+	int *locations_Stop;
+	/* check the baseRIV against each multiplier */
+	while(multipliers<multipliersStop){
+			
+		dot = 0;
+		values = (*multipliers).values;
+		locations = (*multipliers).locations;
+		locations_Stop = locations+(*multipliers).count;
+		
+		while(locations<locations_Stop){
+			/* we calculate the dot-product to derive the cosine */
+			dot += (*values)*(*(baseDenseRIV+(*locations)));
+			locations++;
+			values++;
+		}
+		/* magnitudes had better already be calculated at this point*/
+		cosine = dot/((baseRIV.magnitude)*((*multipliers).magnitude));
+		*results_slider = cosine;
+		results_slider++;
+		/* perform the action defined by the COSINEACTION macro */
+		COSINEACTION;
+	}
+	multipliers++;
+	
+
+	return results;
+}
+
+void getMagnitudes(sparseRIV *inputs, size_t RIVCount){
+	for(int i=0; i<RIVCount; i++){
+		unsigned long long int temp = 0;
+		int *values = inputs[i].values;
+		int *values_stop = values+inputs[i].count;
+		while(values<values_stop){	
+			temp += (*values)*(*values);
+			values++;
+				
+		}
+		float magnitude = sqrt(temp);
+		inputs[i].magnitude = magnitude;
+	}
+}
+
+
--- a/graphdata.txt
+++ b/graphdata.txt
--- a/logfile.txt
+++ b/logfile.txt
--- a/output.txt
+++ b/output.txt
--- a/runscriptUb.sh
+++ b/runscriptUb.sh
+clean(){
+	while [ "$1" ]; do
+		
+		./RIVread "$1"
+		
+		shift
+	done
+}
+
+clean ../bookCleaner/cleanbooks/*
--- a/saturation.c
+++ b/saturation.c
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <time.h>
+#include "RIVtoolsCPUlinux.h"
+void directoryToL2s(char *rootString);
+int main(){
+	RIVInit();
+	char rootString[] = "lexicon/";
+	directoryToL2s(rootString);
+	
+	
+	
+	
+	
+}
+void directoryToL2s(char *rootString){
+	sparseRIV fileRIV;
+	char pathString[2000];
+	DIR *directory;
+    struct dirent *files = 0;
+	
+	if(!(directory = opendir(rootString))){
+		printf("location not found, %s\n", rootString);
+		return;
+	}
+	
+	while((files=readdir(directory))){
+		if(*(files->d_name) == '.') continue;
+	
+		if(files->d_type == DT_DIR){
+			strcpy(pathString, rootString);
+			
+			strcat(pathString, files->d_name);
+			strcat(pathString, "/");
+			directoryToL2s(pathString);
+		}
+			
+
+		strcpy(pathString, rootString);
+		strcat(pathString, files->d_name);
+		FILE *input = fopen(pathString, "r");
+		if(!input){
+			printf("file %s doesn't seem to exist, breaking out of loop", pathString);
+			return;
+		}else{
+			denseRIV temp = lexPull(pathString);
+			fileRIV = consolidateD2S(temp.values);
+			strcpy(fileRIV.name, pathString);
+			float count = fileRIV.count;
+			printf("%s, saturation: %f\n", fileRIV.name, count);
+			fclose(input);
+			free(temp.values);
+			//free(fileRIV.locations);
+		}
+	}
+}
--- a/stemnet2.txt
+++ b/stemnet2.txt
--- a/test.py
+++ b/test.py
+import numpy as np
+import matplotlib.pyplot as plt
+import math
+def fit(x):
+    return 1*(1067+94500000/x)
+
+
+x = 7
+range = 0.15
+while(1):
+    range = input("gimmerange");
+    data = open("graphdata.txt", "r");
+    frequencies = [];
+    mags = [];
+    fitline = [];
+    i = 0;
+    for line in data:
+        segments = line.split(",")
+        freq = int(segments[1])
+        mag = float(segments[2])
+        name = segments[4];
+        
+        core = fit(freq)
+        fitmax = core*(1+range);
+        fitmin = core*(1-range);
+        if(mag >fitmax or mag < fitmin):
+            continue
+        frequencies.append(freq)
+        mags.append(mag)
+	fitline.append(fit(freq));
+	print("{} {} {}".format(name, freq, mag))
+    
+        
+        i+=1
+
+    
+    #plt.scatter(frequencies, mags)
+    plt.plot(frequencies, fitline, 'r^', frequencies, mags, 'bs')
+    plt.show()
+    x+=1
+    
--- a/windows/RIVtoolsCPUwindows.h
+++ b/windows/RIVtoolsCPUwindows.h
+#include <stdio.h>
+#include <stdlib.h>
+#include <strsafe.h>
+
+#define SEEDMASK 25214903917
+struct RIVData{
+	int RIVsize;
+	int nonZeros;
+	long long int *masks;
+	int *h_tempBlock;
+	int *h_stagingBlock;
+	int *h_staging_slider;
+	int *h_staging_stop;
+	int *h_displacements;
+	int *d_OpenSlot;
+	int *d_SlotEnd;
+	float *d_magnitudes;
+	int thing;
+}RIVKeyData;
+typedef struct{
+	char name[100];
+	int *values;
+	int *locations;
+	int count;
+	int frequency;
+	float magnitude;
+	int boolean;
+}sparseRIV;
+sparseRIV FileToL2(FILE *data);
+void consolidateD2S(sparseRIV *destination, int *denseInput);
+void setKeyData(int RIVsize, int nonZeros, int blockSize);
+int* mapS2D(int * destination, sparseRIV input);
+int* makeSparseLocations(int *seeds, int seedCount);
+void makeSeeds(unsigned char* word, int **seeds, int *seedCount);
+float* cosineCompare(sparseRIV baseRIV, sparseRIV *multipliers, int multiplierCount, float threshold);
+void getMagnitudes(sparseRIV *inputs, int RIVCount);
+int *mapI2D(int *locations, int seedCount);
+sparseRIV text2L2(unsigned char *text);
+unsigned char *sscanAdvance(unsigned char **string, unsigned char *word);
+
+
+  sparseRIV FileToL2(FILE *data){
+	
+	
+	unsigned char *word = (unsigned char*)calloc(2000, 1);
+	int *seeds = RIVKeyData.h_tempBlock;
+		
+	
+	int seedCount = 0;
+	
+	while(fscanf(data, "%s", word)){
+		
+		if(feof(data)){
+			break;
+		}
+		if(!(*word)){
+			break;
+		}
+		
+		makeSeeds(word, &seeds, &seedCount);
+		memset(word, 0, 2000);
+		
+	}
+	
+	int *locations = makeSparseLocations(seeds, seedCount);
+	//printf("mcshittles");
+	int *L2dense;
+	L2dense = mapI2D(locations, seedCount);
+	
+	sparseRIV output;
+	//printf("tits");
+	consolidateD2S( &output, L2dense);	
+	free(L2dense);
+	
+	output.boolean = 1;
+	RIVKeyData.thing++;
+	return output;
+	
+}
+
+
+float* cosineCompare(sparseRIV baseRIV, sparseRIV *multipliers, int multiplierCount, float threshold){
+
+	int *baseDenseRIV = RIVKeyData.h_tempBlock;
+	mapS2D(baseDenseRIV, baseRIV);
+	float *outputs = (float*)malloc((multiplierCount)* sizeof(float));
+	float *output_slider = outputs;
+	sparseRIV *multipliersStop = multipliers+multiplierCount;
+	float minsize = baseRIV.magnitude * .75;
+	float maxsize = baseRIV.magnitude * 1.25;
+	
+	while(multipliers<multipliersStop){
+		
+		if(((*multipliers).boolean) /*&& (((*multipliers).magnitude < maxsize) && ((*multipliers).magnitude > minsize))*/){
+			int dot = 0;
+			int *values = (*multipliers).values;
+			int *locations = (*multipliers).locations;
+			int *locations_Stop = locations+(*multipliers).count;
+			
+			while(locations<locations_Stop){
+				
+				dot += (*values)*(*(baseDenseRIV+(*locations)));
+				locations++;
+				values++;
+			}
+			*output_slider= dot/((baseRIV.magnitude)*((*multipliers).magnitude));
+			if(*output_slider>=threshold){
+				printf("%s\t%s\n%f\n", (*multipliers).name, baseRIV.name, *output_slider);
+				(*multipliers).boolean = 0;
+				//RIVKeyData.thing ++;
+			}
+		}
+		multipliers++;
+		output_slider++;
+	}
+	
+	return outputs;
+	
+	
+}
+
+void getMagnitudes(sparseRIV *inputs, int RIVCount){
+	for(int i=0; i<RIVCount; i++){
+		int temp = 0;
+		int *values = inputs[i].values;
+		int *values_stop = values+inputs[i].count;
+		while(values<values_stop){	
+			temp += (*values)*(*values);
+			values++;
+			
+		}
+		float magnitude = sqrt(temp);
+		inputs[i].magnitude = magnitude;
+		//printf("magnitude = %f, \n", magnitude);
+	}
+		
+
+}
+
+int* mapS2D(int* destination, sparseRIV input){
+	memset(destination, 0, RIVKeyData.RIVsize*sizeof(int));
+	
+	int *locations_slider = input.locations;
+	int *values_slider = input.values;
+	int *locations_stop = locations_slider+input.count;
+	
+	while(locations_slider<locations_stop){
+		destination[*locations_slider] = *values_slider;
+		locations_slider++;
+		values_slider++;
+	}
+	
+	//HANDLE_ERROR (cudaMemcpy (RIVKeyData.d_OpenSlot, destination, RIVKeyData.RIVsize*sizeof(int), cudaMemcpyHostToDevice));
+	return destination;
+}
+
+int* mapI2D(int *locations, int valueCount){
+	int *destination = (int*)calloc(RIVKeyData.RIVsize,sizeof(int));
+	int *locations_slider = locations;
+	int *locations_stop = locations_slider+valueCount;
+	int value = 1;
+	while(locations_slider<locations_stop){
+	
+		destination[*locations_slider] +=value;
+		locations_slider++;
+		value = (value == 1)? -1: 1;
+	}
+	
+	
+	return destination;
+}
+	
+	
+
+void consolidateD2S(sparseRIV *destination, int *denseInput){
+	int count = 0;
+	(*destination).locations = (int*) malloc(RIVKeyData.RIVsize*sizeof(int));
+	(*destination).values = (int*) malloc(RIVKeyData.RIVsize*sizeof(int));
+
+	for(int i=0; i<RIVKeyData.RIVsize; i++){
+		if(denseInput[i]){
+			(*destination).locations[count] = i;
+			(*destination).values[count] = denseInput[i];
+			count++;
+		}
+	}
+	destination->count = count;
+	(*destination).locations = (int*) realloc((*destination).locations, (*destination).count*sizeof(int));
+	(*destination).values = (int*) realloc((*destination).values, (*destination).count*sizeof(int));
+	
+}
+
+
+void setKeyData(int RIVsize, int nonZeros, int blockSize){
+	RIVKeyData.RIVsize = RIVsize;
+	if(nonZeros%2){
+		printf("your nonZeros must be an even number");
+		nonZeros++;
+		printf(", changed to %d", nonZeros);
+	}
+	RIVKeyData.nonZeros = nonZeros;
+	RIVKeyData.masks = (long long int*)malloc(nonZeros*sizeof(long long int));
+	
+	for(int i = 0; i<nonZeros; i++){
+		RIVKeyData.masks[i] = SEEDMASK>>(5*i);
+	}
+	RIVKeyData.h_tempBlock = (int*)malloc(blockSize*sizeof(int));
+	//RIVKeyData.h_stagingBlock = (int*)malloc(blockSize*sizeof(int));
+	//RIVKeyData.h_staging_slider = RIVKeyData.h_stagingBlock;
+	RIVKeyData.thing = 0;
+	
+}
+
+void makeSeeds(unsigned char* word,  int **seeds, int *seedCount){
+	
+	int i=0;
+	int seedbase = 0;
+	while(*word){
+		seedbase += (*(word))<<(i*5);
+		word++;
+		i++;
+		
+	}
+	int *seedTrack = (*seeds)+*seedCount;
+	for(i =0 ; i<RIVKeyData.nonZeros; i++){
+		
+		*seedTrack = (seedbase>>i)+(3*i);
+		seedTrack++;
+	
+	}
+	*seedCount+=RIVKeyData.nonZeros;
+	return;
+}
+int* makeSparseLocations(int* seeds, int seedCount){
+
+	int *locations = RIVKeyData.h_tempBlock;
+	int *locations_slider = locations;
+	int *seeds_stop = seeds+seedCount;
+	
+	long long int *mask = RIVKeyData.masks;
+	long long int *mask_stop = mask+RIVKeyData.nonZeros;
+	while(seeds<seeds_stop){
+		*locations_slider =(((*seeds)^(*mask)) & 2147483647) %(RIVKeyData.RIVsize);
+		mask++;
+		locations_slider++;
+		seeds++;
+		if(!(mask<mask_stop)) mask-=RIVKeyData.nonZeros;
+	}
+	
+		
+	return locations;
+}
+
+
+unsigned char *sscanAdvance(unsigned char **string, unsigned char *word){
+	unsigned char *word_slider = word;
+	
+	while(*(*string)){
+		if(*(*string) == ' ') {
+			(*string)++;
+		break;
+	}
+		*word_slider = *(*string);
+	word_slider++;
+	(*string)++;
+	}
+	*word_slider = 0;
+	
+	return word;
+}
+sparseRIV text2L2(unsigned char *text){
+	unsigned char *word = (unsigned char*)calloc(2000, 1);
+	int *seeds = ( int*)malloc(RIVKeyData.nonZeros*sizeof( int));
+	unsigned char *text_slider = text;	
+	
+	int seedCount = 0;
+
+	while(*text_slider){
+		sscanAdvance(&text_slider, word);
+		makeSeeds(word, &seeds, &seedCount);
+		memset(word, 0, 2000);
+		
+	}
+	
+	int *locations = makeSparseLocations(seeds, seedCount);
+	
+	int *L2dense;
+	L2dense = mapI2D(locations, seedCount);
+	free(locations);
+
+	sparseRIV output;
+	consolidateD2S(&output, L2dense);	
+
+	free(seeds);
+	return output;
+}
--- a/windows/RIVtoolsGPU.h
+++ b/windows/RIVtoolsGPU.h
+#include <stdio.h>
+#include <stdlib.h>
+#include <strsafe.h>
+
+#define SEEDMASK 25214903917
+#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__))
+
+static void HandleError(cudaError_t err, const char *file, int line){
+	if(err !=cudaSuccess)
+	{
+		printf("%s in %s at line %d\n", cudaGetErrorString(err), file, line);
+		exit(EXIT_FAILURE);
+	}
+}
+
+	
+__global__ void squirt(float *d_magnitudes, int N){
+	int id =(blockIdx.x*blockDim.x + threadIdx.x);
+	if(id>=N) return;
+	
+	d_magnitudes[id] = sqrt(d_magnitudes[id]);
+	
+}
+	
+__global__ void generateLocations(int *d_seeds, long long int mask, int *d_locations, int RIVsize, int team, int seedCount, int nonZeros){
+
+	int id =nonZeros*(blockIdx.x*blockDim.x + threadIdx.x)+team;
+	if(id>=seedCount) return;
+	d_locations[id] = ((d_seeds[id]^mask) & 2147483647) %(RIVsize);
+
+}
+__global__ void D2S( int* d_DenseRIV, int* d_SparseValues, int* d_SparseLocations, int *d_NZCount, int d_DenseSize){
+	
+	int id =(blockIdx.x*blockDim.x + threadIdx.x);
+	if(id>=d_DenseSize) return;
+	int value = *(d_DenseRIV+id);
+	if(!value) return;
+	
+	int sparseSlot = atomicAdd(d_NZCount, 1);
+	*(d_SparseValues+sparseSlot) = value;
+	*(d_SparseLocations+sparseSlot) = id;
+}
+	
+__global__ void S2D(int *d_locations, int *d_values, int *d_OpenSlot, int numberOfValues){
+	int id = blockIdx.x*blockDim.x + threadIdx.x;
+		if(id>=numberOfValues) return ;
+		atomicAdd( d_OpenSlot + *(d_locations+id) , *(d_values+id));
+		
+}
+__global__ void I2D(int *d_locations, int *d_OpenSlot, int numberOfValues){
+	int id = blockIdx.x*blockDim.x + threadIdx.x;
+	//bitshift
+		int value = (id%2) ? -1: 1;
+		if(id>=numberOfValues) return ;
+		atomicAdd( d_OpenSlot + *(d_locations+id) , value);
+		
+}
+
+
+
+void consolidateD2SStaged(sparseRIV *destination, int *denseInput);
+void consolidateD2S_d(sparseRIV *destination, int *denseInput);
+void setKeyData_d(int RIVsize, int nonZeros, int blockSize);
+int* mapS2D_d(int * destination, sparseRIV input);
+float *getMagnitudes_d(sparseRIV *inputs, int RIVCount);
+int *mapI2D_d(int *locations, int seedCount);
+int* makeSparseLocations_d(int* seeds, int seedCount);
+
+
+
+
+
+float *getMagnitudes_d(sparseRIV *inputs, int RIVCount){
+	float *magnitudes;
+	HANDLE_ERROR (cudaMallocHost((float**)&magnitudes,RIVCount*sizeof(float)));
+	float *magnitudes_slider = magnitudes;
+	for(int i=0; i<RIVCount; i++){
+		int temp = 0;
+		int *values = inputs[i].values;
+		int *values_stop = values+inputs[i].count;
+		while(values<values_stop){
+			temp += (*values)*(*values);
+			values++;
+		}
+		*magnitudes_slider = temp;
+		magnitudes_slider++;
+
+	}
+		HANDLE_ERROR (cudaMalloc((void**)&RIVKeyData.d_magnitudes, RIVCount*sizeof(float)));
+		HANDLE_ERROR (cudaMemcpy (RIVKeyData.d_magnitudes, magnitudes, RIVCount*sizeof(float), cudaMemcpyHostToDevice));
+		
+		int blockSize;  
+		int minGridSize = 0;
+		int gridSize; 
+		cudaOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, squirt); 
+		gridSize = ((RIVCount + blockSize -1) / blockSize)+1; 
+		
+		squirt<<<gridSize,blockSize >>> (RIVKeyData.d_magnitudes, RIVCount);
+
+		HANDLE_ERROR (cudaMemcpy (magnitudes, RIVKeyData.d_magnitudes, RIVCount*sizeof(float), cudaMemcpyDeviceToHost));
+		magnitudes_slider = magnitudes;
+		for(int i=0; i<RIVCount; i++){
+			inputs[i].magnitude =  *magnitudes_slider;
+			magnitudes_slider++;
+			
+		}
+	return magnitudes;	
+}
+int *mapS2D_d(int* destination, sparseRIV input){
+	
+	int *d_locations = RIVKeyData.d_OpenSlot+RIVKeyData.RIVsize;
+	
+	int *d_values = d_locations+input.count;
+						
+
+	HANDLE_ERROR (cudaMemset (RIVKeyData.d_OpenSlot, 0, RIVKeyData.RIVsize*sizeof(int)));
+	HANDLE_ERROR (cudaMemcpy (d_locations, input.locations, input.count*sizeof(int), cudaMemcpyHostToDevice));
+	HANDLE_ERROR (cudaMemcpy (d_values, input.values, input.count*sizeof(int), cudaMemcpyHostToDevice));
+	
+	int blockSize;  
+	int minGridSize = 0;
+	int gridSize; 
+	cudaOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, S2D); 
+	gridSize = ((input.count + blockSize -1) / blockSize)+1; 
+		
+	S2D <<<gridSize,blockSize>>> (d_locations, d_values, RIVKeyData.d_OpenSlot, input.count);
+	
+	HANDLE_ERROR (cudaMemcpy (destination, RIVKeyData.d_OpenSlot, RIVKeyData.RIVsize*sizeof(int), cudaMemcpyDeviceToHost));
+	return destination;
+	
+}
+int* mapI2D_d(int *locations, int valueCount){
+	int *d_locations = RIVKeyData.d_OpenSlot+RIVKeyData.RIVsize;
+	
+	HANDLE_ERROR (cudaMemset (RIVKeyData.d_OpenSlot, 0, RIVKeyData.RIVsize*sizeof(int)));
+	HANDLE_ERROR (cudaMemcpy (d_locations, locations, valueCount*sizeof(int), cudaMemcpyHostToDevice));
+	int blockSize;  
+	int minGridSize = 0;
+	int gridSize; 
+	cudaOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, I2D); 
+	gridSize = ((valueCount + blockSize -1) / blockSize)+1; 
+		
+	I2D <<<gridSize,blockSize>>> (d_locations, RIVKeyData.d_OpenSlot, valueCount);
+	int* valuesOut = RIVKeyData.h_tempBlock;
+	HANDLE_ERROR (cudaMemcpy (valuesOut, RIVKeyData.d_OpenSlot, RIVKeyData.RIVsize*sizeof(int), cudaMemcpyDeviceToHost));
+	return valuesOut;
+	
+}
+void consolidateD2SStaged(sparseRIV *destination, int *denseInput){
+	int count = 0;
+	int *locations = RIVKeyData.h_tempBlock;
+	int *values =  RIVKeyData.h_tempBlock + RIVKeyData.RIVsize;
+
+	for(int i=0; i<RIVKeyData.RIVsize; i++){
+		
+		if(denseInput[i]){
+			
+			locations[count] = i;
+			values[count] = denseInput[i];
+			count++;
+		}
+	}
+	int *locations_slider = locations+count;
+	while(locations_slider>=locations){
+		RIVKeyData.h_staging_slider--;
+		locations_slider--;
+		*RIVKeyData.h_staging_slider = *locations_slider;
+	}
+	(*destination).locations = RIVKeyData.h_staging_slider;
+	
+	int *values_slider = values+count;
+	while(values_slider>=values){
+		RIVKeyData.h_staging_slider--;
+		values_slider--;
+		*RIVKeyData.h_staging_slider = *values_slider;
+	}
+	(*destination).values = RIVKeyData.h_staging_slider;
+	
+	RIVKeyData.h_staging_slider--;
+	*RIVKeyData.h_staging_slider = count;
+	
+	*RIVKeyData.h_displacements = RIVKeyData.h_staging_slider -RIVKeyData.h_stagingBlock;
+	RIVKeyData.h_displacements++;
+	
+}
+void consolidateD2S_d(sparseRIV *destination, int *denseInput){
+	
+	int *d_valueCount;
+	HANDLE_ERROR (cudaMalloc((void**)&d_valueCount, sizeof(int)));
+	HANDLE_ERROR(cudaMemset(d_valueCount, 0, sizeof(int)));
+	
+	HANDLE_ERROR (cudaMemcpy (RIVKeyData.d_OpenSlot, denseInput, RIVKeyData.RIVsize*sizeof(int), cudaMemcpyHostToDevice));
+	int *d_outValues = RIVKeyData.d_OpenSlot+RIVKeyData.RIVsize;
+	int *d_outLocations = d_outValues+RIVKeyData.RIVsize;
+	int blockSize;  
+	int minGridSize = 0;
+	int gridSize; 
+	cudaOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, D2S); 
+	
+	gridSize = ((RIVKeyData.RIVsize + blockSize -1) / blockSize)+1; 
+		D2S <<<gridSize,blockSize>>> (RIVKeyData.d_OpenSlot, d_outValues, d_outLocations, d_valueCount, RIVKeyData.RIVsize);
+	cudaDeviceSynchronize();
+	
+	HANDLE_ERROR (cudaMemcpy (&(*destination).count, d_valueCount, sizeof(int), cudaMemcpyDeviceToHost));
+	
+	(*destination).locations = RIVKeyData.h_staging_slider;
+	RIVKeyData.h_staging_slider+=(*destination).count;
+	
+	(*destination).values = RIVKeyData.h_staging_slider;
+	RIVKeyData.h_staging_slider+=(*destination).count;
+	
+	HANDLE_ERROR (cudaMemcpy ((*destination).values, d_outValues, ((*destination).count)*sizeof(int), cudaMemcpyDeviceToHost));
+	HANDLE_ERROR (cudaMemcpy ((*destination).locations, d_outLocations, ((*destination).count)*sizeof(int), cudaMemcpyDeviceToHost));
+	cudaFree(d_valueCount);
+		
+}
+void setKeyData_d(int RIVsize, int nonZeros, int blockSize){
+	RIVKeyData.RIVsize = RIVsize;
+	if(nonZeros%2){
+		printf("your nonZeros must be an even number");
+		nonZeros++;
+		printf(", changed to %d", nonZeros);
+	}
+	RIVKeyData.nonZeros = nonZeros;
+	RIVKeyData.masks = (long long int*)malloc(nonZeros*sizeof(long long int));
+	
+	for(int i = 0; i<nonZeros; i++){
+		RIVKeyData.masks[i] = SEEDMASK>>(5*i);
+	}
+	HANDLE_ERROR (cudaMallocHost((void**)&RIVKeyData.h_tempBlock, blockSize*sizeof(int)));
+	HANDLE_ERROR (cudaMallocHost((void**)&RIVKeyData.h_stagingBlock, blockSize*sizeof(int)));
+	RIVKeyData.h_staging_stop = RIVKeyData.h_stagingBlock + blockSize;
+	RIVKeyData.h_staging_slider = RIVKeyData.h_staging_stop;
+	RIVKeyData.h_displacements = RIVKeyData.h_stagingBlock;
+	HANDLE_ERROR (cudaMalloc((void**)&RIVKeyData.d_OpenSlot, blockSize*sizeof(int)));
+	RIVKeyData.d_SlotEnd = RIVKeyData.d_OpenSlot+blockSize;
+	RIVKeyData.thing = 0;
+	
+}
+int* makeSparseLocations_d(int* seeds, int seedCount){
+
+	int *d_locations = RIVKeyData.d_OpenSlot;
+	int *d_seeds = d_locations+seedCount;
+	HANDLE_ERROR (cudaMemcpy(d_seeds, seeds, seedCount*sizeof(int), cudaMemcpyHostToDevice));
+	
+	int blockSize;  
+	int minGridSize = 0;
+	int gridSize; 
+
+	cudaOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, generateLocations); 
+	gridSize = ((seedCount + blockSize -1) / (RIVKeyData.nonZeros*blockSize))+1; 
+	long long int *mask = RIVKeyData.masks;
+	for(int team=0; team<RIVKeyData.nonZeros; team++){
+		generateLocations <<<gridSize,blockSize,team>>> (d_seeds, *mask, d_locations, RIVKeyData.RIVsize, team, seedCount, RIVKeyData.nonZeros);
+		mask++;
+	}
+	
+	cudaDeviceSynchronize();
+	int *locations = RIVKeyData.h_tempBlock;
+	HANDLE_ERROR (cudaMemcpy(locations, d_locations, seedCount*sizeof(int), cudaMemcpyDeviceToHost));
+	return locations;
+}
+void addS2DsBlocked(int *denseBlock, sparseRIV additive, int RIVCount){
+	
+	int *d_locations= RIVKeyData.d_OpenSlot+RIVCount*RIVKeyData.RIVsize;
+	int *d_values = d_locations+additive.count;
+	HANDLE_ERROR (cudaMemcpy (d_locations, additive.locations, additive.count*sizeof(int), cudaMemcpyHostToDevice));
+	HANDLE_ERROR (cudaMemcpy (d_values, additive.values, additive.count*sizeof(int), cudaMemcpyHostToDevice));
+	
+	int blockSize;  
+	int minGridSize = 0;
+	int gridSize; 
+	cudaOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, S2Ds); 
+	gridSize = ((additive.count + blockSize -1) / blockSize)+1; 
+	S2Ds<<<additive.count,1>>>(RIVKeyData.d_OpenSlot, d_locations, d_values, additive.count, RIVCount, RIVKeyData.RIVsize);
+	
+	HANDLE_ERROR (cudaMemcpy (denseBlock, RIVKeyData.d_OpenSlot, RIVCount*RIVKeyData.RIVsize*sizeof(int), cudaMemcpyDeviceToHost));
+
+}