Commit d78631fd by simetk

sparse push/pull and added beginnings of morphicc RIVs

parent 9c6ceacf
......@@ -11,6 +11,10 @@
#define RIVSIZE 25000
#endif
#if RIVSIZE<0
#error "RIVSIZE must be a positive number (preferably a large positive)"
#endif
/* NONZeros macro defines the number of non-zero values that will be generated
* for any level one (barcode) RIV. 2 is simple and lightweight to begin
*/
......@@ -18,6 +22,11 @@
#define NONZEROS 2
#endif
#if NONZEROS%2 || NONZEROS<1
#error "NONZEROS must be an even, greater than 0 number"
#endif
/* CACHESIZE macro defines the number of RIVs the system will cache.
* a larger cache means more memory consumption, but will also be significantly
* faster in aggregation and reading applications. doesn't affect systems
......@@ -27,6 +36,11 @@
#define CACHESIZE 20
#endif
#if CACHESIZE<0
#error "CACHESIZE cannot be a negative number"
#endif
/* the sparseRIV is a RIV form optimized for RIVs that will be mostly 0s
* as this is often an ideal case, it is adviseable as the default
* unless we are doing long term RIV aggregation.
......@@ -63,14 +77,11 @@ typedef struct{
* memory blocks which the system will use in the background
*/
struct RIVData{
size_t RIVsize;
int nonZeros;
int I2SThreshold;
int *h_tempBlock;
int tempSize;
int thing;
denseRIV* RIVCache;
int cacheSize;
denseRIV RIVCache[CACHESIZE];
}static RIVKey;
/* RIVinit should be the first function called in any usage of this library
......@@ -151,7 +162,7 @@ int* addS2D(int* destination, sparseRIV input){// #TODO fix destination paramete
int* mapI2D(int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination
int *destination = (int*)calloc(RIVKey.RIVsize,sizeof(int));
int *destination = (int*)calloc(RIVSIZE,sizeof(int));
int *locations_slider = locations;
int *locations_stop = locations_slider+valueCount;
......@@ -198,8 +209,8 @@ sparseRIV consolidateI2SIndirect(int *implicit, size_t valueCount){
}
sparseRIV consolidateI2SDirect(int *implicit, size_t valueCount){
sparseRIV sparseOut;
int *locationsTemp = RIVKey.h_tempBlock+RIVKey.RIVsize;
int *valuesTemp = RIVKey.h_tempBlock+2*RIVKey.RIVsize;
int *locationsTemp = RIVKey.h_tempBlock+RIVSIZE;
int *valuesTemp = RIVKey.h_tempBlock+2*RIVSIZE;
sparseOut.count = 0;
int add = 1;
int found;
......@@ -229,11 +240,11 @@ sparseRIV consolidateD2S(int *denseInput){
sparseRIV output;
output.count = 0;
/* key/value pairs will be loaded to a worst-case sized temporary slot */
int* locations = RIVKey.h_tempBlock+RIVKey.RIVsize;
int* values = locations+RIVKey.RIVsize;
int* locations = RIVKey.h_tempBlock+RIVSIZE;
int* values = locations+RIVSIZE;
int* locations_slider = locations;
int* values_slider = values;
for(int i=0; i<RIVKey.RIVsize; i++){
for(int i=0; i<RIVSIZE; i++){
/* act only on non-zeros */
if(denseInput[i]){
......@@ -266,14 +277,7 @@ sparseRIV consolidateD2S(int *denseInput){
void RIVInit(){
RIVKey.RIVsize = RIVSIZE; //#TODO decide about macros vs global variables
RIVKey.nonZeros = NONZEROS;
RIVKey.I2SThreshold = sqrt(RIVSIZE);
if(RIVKey.nonZeros%2){
printf("your NONZEROS value must be an even number");
RIVKey.nonZeros++;
printf(", changed to %d", RIVKey.nonZeros);
}
/* open a slot at least large enough for worst case handling of
* sparse to dense conversion. may be enlarged by filetoL2 functions */
......@@ -284,12 +288,11 @@ void RIVInit(){
sigaction(11,&action,NULL);
//}
RIVKey.h_tempBlock = (int*)malloc(3*RIVKey.RIVsize*sizeof(int));
RIVKey.tempSize = 3*RIVKey.RIVsize;
RIVKey.h_tempBlock = (int*)malloc(3*RIVSIZE*sizeof(int));
RIVKey.tempSize = 3*RIVSIZE;
RIVKey.thing = 0;
RIVKey.cacheSize = CACHESIZE;
/* open a slot for a cache of dense RIVs, optimized for frequent accesses */
RIVKey.RIVCache = (denseRIV*)calloc(RIVKey.cacheSize,sizeof(denseRIV));
memset(RIVKey.RIVCache, 0, sizeof(denseRIV)*CACHESIZE);
}
void RIVCleanup(){
......@@ -297,9 +300,6 @@ void RIVCleanup(){
if(cacheDump()){
puts("cache dump failed, some lexicon data was lost");
}
#if CACHESIZE > 0
free(RIVKey.RIVCache);
#endif
free(RIVKey.h_tempBlock);
}
......@@ -318,12 +318,12 @@ int wordtoSeed(unsigned char* word){
void makeSparseLocations(unsigned char* word, int *locations, size_t count){
locations+=count;
srand(wordtoSeed(word));
int *locations_stop = locations+RIVKey.nonZeros;
int *locations_stop = locations+NONZEROS;
while(locations<locations_stop){
/* unrolled for speed, guaranteed to be an even number of steps */
*locations = rand()%RIVKey.RIVsize;
*locations = rand()%RIVSIZE;
locations++;
*locations = rand()%RIVKey.RIVsize;
*locations = rand()%RIVSIZE;
locations++;
}
return;
......@@ -341,28 +341,39 @@ int fLexPush(denseRIV RIVout){
printf("lexicon push has failed for word: %s\nconsider cleaning inputs", pathString);
return 1;
}
fwrite(RIVout.frequency, 1, 4, lexWord);
fwrite(&RIVout.magnitude, 1, 4, lexWord);
fwrite(RIVout.values, RIVKey.RIVsize, 4, lexWord);
sparseRIV temp = consolidateD2S(RIVout.values);
fwrite(&temp.count, 1, sizeof(size_t), lexWord);
fwrite(RIVout.frequency, 1, sizeof(float), lexWord);
fwrite(&RIVout.magnitude, 1, sizeof(float), lexWord);
fwrite(temp.locations, temp.count, sizeof(int), lexWord);
fwrite(temp.values, temp.count, sizeof(int), lexWord);
fclose(lexWord);
free(RIVout.values);
free(temp.locations);
return 0;
}
denseRIV fLexPull(FILE* lexWord){
denseRIV output;
output.values = malloc( (RIVKey.RIVsize+1) *sizeof(int));
output.frequency = output.values+RIVKey.RIVsize;
int diagnostic = 0;
diagnostic += fread(output.frequency, 1, sizeof(int), lexWord);
diagnostic += fread(&(output.magnitude), 1, sizeof(int), lexWord);
diagnostic += fread(output.values, RIVKey.RIVsize, sizeof(int), lexWord);
sparseRIV temp;
if(diagnostic != (RIVKey.RIVsize+2)){
output.magnitude = -1;
}
output.values = calloc( (RIVSIZE+1) ,sizeof(int));
output.frequency = output.values+RIVSIZE;
int diagnostic = 0;
fread(&temp.count, 1, sizeof(size_t), lexWord);
diagnostic += fread(&temp.frequency, 1, sizeof(int), lexWord);
diagnostic += fread(&(temp.magnitude), 1, sizeof(int), lexWord);
temp.locations = malloc(temp.count*2*sizeof(int));
temp.values = temp.locations+temp.count;
diagnostic += fread(temp.locations, temp.count, sizeof(int), lexWord);
diagnostic += fread(temp.values, temp.count, sizeof(int), lexWord);
addS2D(output.values, temp);
*(output.frequency) = temp.frequency;
output.magnitude = temp.magnitude;
free(temp.locations);
output.cached = 0;
return output;
......@@ -381,7 +392,7 @@ void signalSecure(int signum, siginfo_t *si, void* arg){
int cacheDump(){
int flag = 0;
denseRIV* cache_slider = RIVKey.RIVCache;
denseRIV* cache_stop = RIVKey.RIVCache+RIVKey.cacheSize;
denseRIV* cache_stop = RIVKey.RIVCache+CACHESIZE;
while(cache_slider<cache_stop){
if((*cache_slider).cached){
flag += fLexPush(*cache_slider);
......@@ -394,9 +405,9 @@ int cacheDump(){
denseRIV denseAllocate(){
/* allocates a 0 vector */
denseRIV output;
output.values = calloc(RIVKey.RIVsize+1, sizeof(int));
output.values = calloc(RIVSIZE+1, sizeof(int));
/* for compact memory use, frequency is placed immediately after values */
output.frequency = output.values+RIVKey.RIVsize;
output.frequency = output.values+RIVSIZE;
output.magnitude = 0;
output.cached = 0;
return output;
......
No preview for this file type
......@@ -27,9 +27,9 @@
#define CACHESIZE 20
#endif
#define CACHED = 0x01
#define SPARSE = 0x02
#define AVAILABLE = 0x04
#define CACHED 0x02
#define SPARSE 0x01
#define AVAILABLE 0x04
typedef struct{
char name[100];
......@@ -38,7 +38,9 @@ typedef struct{
size_t count;
unsigned int* frequency;
float magnitude;
char flags;
int cached;
int boolean;
int flags;
}RIV;
......@@ -152,7 +154,7 @@ int* addS2D(int* destination, sparseRIV input){// #TODO fix destination paramete
int* mapI2D(int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination
int *destination = (int*)calloc(RIVKey.RIVsize,sizeof(int));
int *destination = (int*)calloc(RIVSIZE,sizeof(int));
int *locations_slider = locations;
int *locations_stop = locations_slider+valueCount;
......@@ -199,8 +201,8 @@ sparseRIV consolidateI2SIndirect(int *implicit, size_t valueCount){
}
sparseRIV consolidateI2SDirect(int *implicit, size_t valueCount){
sparseRIV sparseOut;
int *locationsTemp = RIVKey.h_tempBlock+RIVKey.RIVsize;
int *valuesTemp = RIVKey.h_tempBlock+2*RIVKey.RIVsize;
int *locationsTemp = RIVKey.h_tempBlock+RIVSIZE;
int *valuesTemp = RIVKey.h_tempBlock+2*RIVSIZE;
sparseOut.count = 0;
int add = 1;
int found;
......@@ -224,17 +226,18 @@ sparseRIV consolidateI2SDirect(int *implicit, size_t valueCount){
sparseOut.locations = malloc(2*sparseOut.count*sizeof(int));
sparseOut.values = sparseOut.locations+sparseOut.count;
memcpy(sparseOut.locations, locationsTemp, 2*sparseOut.count*sizeof(int));
sparseOut.flags |= SPARSE;
return sparseOut;
}
sparseRIV consolidateD2S(int *denseInput){
sparseRIV output;
output.count = 0;
/* key/value pairs will be loaded to a worst-case sized temporary slot */
int* locations = RIVKey.h_tempBlock+RIVKey.RIVsize;
int* values = locations+RIVKey.RIVsize;
int* locations = RIVKey.h_tempBlock+RIVSIZE;
int* values = locations+RIVSIZE;
int* locations_slider = locations;
int* values_slider = values;
for(int i=0; i<RIVKey.RIVsize; i++){
for(int i=0; i<RIVSIZE; i++){
/* act only on non-zeros */
if(denseInput[i]){
......@@ -261,20 +264,14 @@ sparseRIV consolidateD2S(int *denseInput){
/* copy values into opened slot */
memcpy(output.values, values, output.count*sizeof(int));
output.flags |= SPARSE;
return output;
}
void RIVInit(){
RIVKey.RIVsize = RIVSIZE; //#TODO decide about macros vs global variables
RIVKey.nonZeros = NONZEROS;
RIVKey.I2SThreshold = sqrt(RIVSIZE);
if(RIVKey.nonZeros%2){
printf("your NONZEROS value must be an even number");
RIVKey.nonZeros++;
printf(", changed to %d", RIVKey.nonZeros);
}
/* open a slot at least large enough for worst case handling of
* sparse to dense conversion. may be enlarged by filetoL2 functions */
......@@ -285,8 +282,8 @@ void RIVInit(){
sigaction(11,&action,NULL);
//}
RIVKey.h_tempBlock = (int*)malloc(3*RIVKey.RIVsize*sizeof(int));
RIVKey.tempSize = 3*RIVKey.RIVsize;
RIVKey.h_tempBlock = (int*)malloc(3*RIVSIZE*sizeof(int));
RIVKey.tempSize = 3*RIVSIZE;
RIVKey.thing = 0;
RIVKey.cacheSize = CACHESIZE;
/* open a slot for a cache of dense RIVs, optimized for frequent accesses */
......@@ -319,12 +316,12 @@ int wordtoSeed(unsigned char* word){
void makeSparseLocations(unsigned char* word, int *locations, size_t count){
locations+=count;
srand(wordtoSeed(word));
int *locations_stop = locations+RIVKey.nonZeros;
int *locations_stop = locations+NONZEROS;
while(locations<locations_stop){
/* unrolled for speed, guaranteed to be an even number of steps */
*locations = rand()%RIVKey.RIVsize;
*locations = rand()%RIVSIZE;
locations++;
*locations = rand()%RIVKey.RIVsize;
*locations = rand()%RIVSIZE;
locations++;
}
return;
......@@ -345,7 +342,7 @@ int fLexPush(denseRIV RIVout){
fwrite(RIVout.frequency, 1, 4, lexWord);
fwrite(&RIVout.magnitude, 1, 4, lexWord);
fwrite(RIVout.values, RIVKey.RIVsize, 4, lexWord);
fwrite(RIVout.values, RIVSIZE, 4, lexWord);
fclose(lexWord);
free(RIVout.values);
......@@ -354,17 +351,17 @@ int fLexPush(denseRIV RIVout){
denseRIV fLexPull(FILE* lexWord){
denseRIV output;
output.values = malloc( (RIVKey.RIVsize+1) *sizeof(int));
output.frequency = output.values+RIVKey.RIVsize;
output.values = malloc( (RIVSIZE+1) *sizeof(int));
output.frequency = (unsigned int*)(output.values+RIVSIZE);
int diagnostic = 0;
diagnostic += fread(output.frequency, 1, sizeof(int), lexWord);
diagnostic += fread(&(output.magnitude), 1, sizeof(int), lexWord);
diagnostic += fread(output.values, RIVKey.RIVsize, sizeof(int), lexWord);
diagnostic += fread(output.values, RIVSIZE, sizeof(int), lexWord);
if(diagnostic != (RIVKey.RIVsize+2)){
if(diagnostic != (RIVSIZE+2)){
output.magnitude = -1;
}
output.flags = 0;
output.cached = 0;
return output;
}
......@@ -395,9 +392,9 @@ int cacheDump(){
denseRIV denseAllocate(){
/* allocates a 0 vector */
denseRIV output;
output.values = calloc(RIVKey.RIVsize+1, sizeof(int));
output.values = calloc(RIVSIZE+1, sizeof(int));
/* for compact memory use, frequency is placed immediately after values */
output.frequency = output.values+RIVKey.RIVsize;
output.frequency = (unsigned int*)(output.values+RIVSIZE);
output.magnitude = 0;
output.cached = 0;
return output;
......
No preview for this file type
No preview for this file type
......@@ -14,7 +14,7 @@
RIVKey.thing++; \
}\
}while(0)
#include "RIVtoolsCPUlinux.h"
#include "RIVtoolsMorphic.h"
void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
......@@ -48,13 +48,13 @@ int main(int argc, char *argv[]){
float minmag;
float maxmag;
denseRIV baseDense;
baseDense.values = malloc(RIVKey.RIVsize*sizeof(int));
baseDense.values = malloc(RIVSIZE*sizeof(int));
fileRIVs_slider = fileRIVs;
sparseRIV* comparators_slider;
while(fileRIVs_slider<fileRIVs_stop){
comparators_slider = fileRIVs;
memset(baseDense.values, 0, RIVKey.RIVsize*sizeof(int));
memset(baseDense.values, 0, RIVSIZE*sizeof(int));
baseDense.values = addS2D(baseDense.values, *fileRIVs_slider);
baseDense.magnitude = (*fileRIVs_slider).magnitude;
minmag = baseDense.magnitude*.85;
......
No preview for this file type
No preview for this file type
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define CACHESIZE 10000
#define CACHESIZE 100000
#include "RIVtoolsCPUlinux.h"
#include <sys/stat.h>
#include <sys/types.h>
......@@ -84,7 +84,7 @@ void directoryGrind(char *rootString){
}
strcpy(pathString, rootString);
strcat(pathString, files->d_name);
//printf("%s\n", pathString);
printf("%s\n", pathString);
FILE *input = fopen(pathString, "r+");
if(input){
fileGrind(input);
......
File added
......@@ -52,23 +52,23 @@ sparseRIV text2L2(char *text){
break;
}
blockSize = locationCount+RIVKey.nonZeros;
blockSize = locationCount+NONZEROS;
/* if this word would overflow the locations block, grow it */
if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=RIVKey.nonZeros;
RIVKey.tempSize+=NONZEROS;
}
/* add word's L1 RIV to the accumulating implicit RIV */
makeSparseLocations((unsigned char*)word, locations, locationCount);
locationCount+= RIVKey.nonZeros;
locationCount+= NONZEROS;
}
sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file */
output.frequency = locationCount/RIVKey.nonZeros;
output.frequency = locationCount/NONZEROS;
output.boolean = 1;
return output;
}
......@@ -92,24 +92,24 @@ sparseRIV fileToL2(FILE *data){
break;
}
blockSize = locationCount+RIVKey.nonZeros;
blockSize = locationCount+NONZEROS;
/* if this word would overflow the locations block, grow it */
if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=RIVKey.nonZeros;
RIVKey.tempSize+=NONZEROS;
}
/* add word's L1 RIV to the accumulating implicit RIV */
makeSparseLocations(word, locations, locationCount);
locationCount+= RIVKey.nonZeros;
locationCount+= NONZEROS;
}
sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file */
output.frequency = locationCount/RIVKey.nonZeros;
output.frequency = locationCount/NONZEROS;
output.boolean = 1;
return output;
......@@ -137,22 +137,22 @@ sparseRIV fileToL2Clean(FILE *data){
if(!isWordClean((char*)word)){
continue;
}
blockSize = locationCount+RIVKey.nonZeros;
blockSize = locationCount+NONZEROS;
if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*)realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=RIVKey.nonZeros;
RIVKey.tempSize+=NONZEROS;
}
makeSparseLocations(word, locations, locationCount);
locationCount+= RIVKey.nonZeros;
locationCount+= NONZEROS;
}
sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file */
output.frequency = locationCount/RIVKey.nonZeros;
output.frequency = locationCount/NONZEROS;
output.boolean = 1;
return output;
}
......@@ -169,10 +169,10 @@ void aggregateWord2D(denseRIV destination, char* word){
//makeSparseLocations((unsigned char*)word, locationSlot, 0);
srand(wordtoSeed((unsigned char*)word));
for(int i=0; i<RIVKey.nonZeros; i++){
for(int i=0; i<NONZEROS; i++){
destination.values[(rand()%RIVKey.RIVsize)] +=1;
destination.values[(rand()%RIVKey.RIVsize)] -= 1;
destination.values[(rand()%RIVSIZE)] +=1;
destination.values[(rand()%RIVSIZE)] -= 1;
}
}
......@@ -214,7 +214,7 @@ denseRIV lexPull(char* word){
/* if there is a cache, first check if the word is cached */
srand(wordtoSeed((unsigned char*)word));
int hash = rand()%RIVKey.cacheSize;
int hash = rand()%CACHESIZE;
if(!strcmp(word, RIVKey.RIVCache[hash].name)){
/* if word is cached, pull from cache and exit */
......@@ -259,7 +259,7 @@ int lexPush(denseRIV RIVout){
}
srand(wordtoSeed((unsigned char*)RIVout.name));
int hash = rand()%RIVKey.cacheSize;
int hash = rand()%CACHESIZE;
if(!RIVKey.RIVCache[hash].cached){
RIVKey.RIVCache[hash] = RIVout;
......
No preview for this file type
......@@ -2,7 +2,7 @@
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "RIVLower.h"
#include "RIVLowerMorphic.h"
#include "RIVaccessories.h"
/* lexPush writes a denseRIV to a file for permanent storage */
......@@ -39,7 +39,7 @@ sparseRIV text2L2(char *text){
/* locations (implicit RIV) are temp stored in temp block, and moved
* to permanent home in consolidation */
int *locations = RIVKey.h_tempBlock;
int locationCount = 0;
unsigned int locationCount = 0;
int displacement;
while(sscanf(text, "%99s%n", word, &displacement)){
......@@ -52,23 +52,23 @@ sparseRIV text2L2(char *text){
break;
}
blockSize = locationCount+RIVKey.nonZeros;
blockSize = locationCount+NONZEROS;
/* if this word would overflow the locations block, grow it */
if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=RIVKey.nonZeros;
RIVKey.tempSize+=NONZEROS;
}
/* add word's L1 RIV to the accumulating implicit RIV */
makeSparseLocations((unsigned char*)word, locations, locationCount);
locationCount+= RIVKey.nonZeros;
locationCount+= NONZEROS;
}
sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file */
output.frequency = locationCount/RIVKey.nonZeros;
*(output.frequency) = locationCount/NONZEROS;
output.boolean = 1;
return output;
}
......@@ -92,26 +92,25 @@ sparseRIV fileToL2(FILE *data){
break;
}
blockSize = locationCount+RIVKey.nonZeros;
blockSize = locationCount+NONZEROS;
/* if this word would overflow the locations block, grow it */
if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=RIVKey.nonZeros;
RIVKey.tempSize+=NONZEROS;
}
/* add word's L1 RIV to the accumulating implicit RIV */
makeSparseLocations(word, locations, locationCount);
locationCount+= RIVKey.nonZeros;
locationCount+= NONZEROS;
}
sparseRIV output = consolidateI2S(locations, locationCount);
output.frequency = malloc(1*sizeof(int));
/* frequency records the number of words in this file */
output.frequency = locationCount/RIVKey.nonZeros;
*(output.frequency) = locationCount/NONZEROS;
output.boolean = 1;
return output;
}
......@@ -137,22 +136,22 @@ sparseRIV fileToL2Clean(FILE *data){
if(!isWordClean((char*)word)){
continue;
}
blockSize = locationCount+RIVKey.nonZeros;
blockSize = locationCount+NONZEROS;
if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*)realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=RIVKey.nonZeros;
RIVKey.tempSize+=NONZEROS;
}
makeSparseLocations(word, locations, locationCount);
locationCount+= RIVKey.nonZeros;
locationCount+= NONZEROS;
}
sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file */
output.frequency = locationCount/RIVKey.nonZeros;
*(output.frequency) = locationCount/NONZEROS;
output.boolean = 1;
return output;
}
......@@ -169,10 +168,10 @@ void aggregateWord2D(denseRIV destination, char* word){
//makeSparseLocations((unsigned char*)word, locationSlot, 0);
srand(wordtoSeed((unsigned char*)word));
for(int i=0; i<RIVKey.nonZeros; i++){
for(int i=0; i<NONZEROS; i++){
destination.values[(rand()%RIVKey.RIVsize)] +=1;
destination.values[(rand()%RIVKey.RIVsize)] -= 1;
destination.values[(rand()%RIVSIZE)] +=1;
destination.values[(rand()%RIVSIZE)] -= 1;
}
}
......@@ -195,10 +194,17 @@ float cosCompare(denseRIV baseRIV, sparseRIV comparator){
return cosine;
}
float getMagnitudeSparse(sparseRIV input){
float getMagnitudeSparse(RIV input){
size_t count;
if(input.flags & SPARSE){
count = input.count;
}else{
count = RIVSIZE;
}
unsigned long long int temp = 0;
int *values = input.values;
int *values_stop = values+input.count;
int *values_stop = values+count;
while(values<values_stop){
temp += (*values)*(*values);
values++;
......@@ -309,7 +315,7 @@ sparseRIV fileToL2direct(FILE *data){;
sparseRIV output = consolidateD2S(denseTemp.values);
// frequency records the number of words in this file
output.frequency = count;
*(output.frequency) = count;
output.boolean = 1;
return output;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment