Commit 49b28180 by Ethan

added caching

parent c42a95f1
Showing with 553 additions and 91 deletions

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
/* RIVSIZE macro defines the dimensionality off the RIVs we will use
* 25000 is the standard, but can be redefined specifically
*/
......@@ -50,6 +52,7 @@ typedef struct{
int* values;
int* frequency;
float magnitude;
int cached;
}denseRIV;
/*RIVKey, holds globally important data that should not be changed partway through
......@@ -74,7 +77,7 @@ struct RIVData{
* the functions to use, so that we can move fast with rare allocations.
* #TODO add signal redefinitions so that cache is saved even on crash
*/
void RIVinit();
void RIVInit();
/* RIVCleanup should always be called to close a RIV program. it frees
* blocks allocated by RIVinit, and dumps the cached data to appropriate lexicon files
......@@ -107,7 +110,9 @@ void makesparseLocations(unsigned char* word, int *seeds, size_t seedCount);
*/
int fLexPush(denseRIV RIVout);
denseRIV fLexPull(FILE* lexWord, denseRIV output);
/* creates a standard seed from the characters in a word, hopefully unique */
int wordtoSeed(unsigned char* word);
/* mapI2D maps an "implicit RIV" that is, an array of index values,
......@@ -116,6 +121,10 @@ int wordtoSeed(unsigned char* word);
*/
int* mapI2D(int *locations, size_t seedCount);
int* addI2D(int* destination, int* locations, size_t seedCount);
int cacheDump();
void signalSecure(int signum, siginfo_t *si, void* arg);
/* begin definitions */
int* mapS2D(int* destination, sparseRIV input){// #TODO fix destination parameter vs calloc of destination
......@@ -136,6 +145,21 @@ int* mapS2D(int* destination, sparseRIV input){// #TODO fix destination paramete
return destination;
}
int* addS2D(int* destination, sparseRIV input){// #TODO fix destination parameter vs calloc of destination
int *locations_slider = input.locations;
int *values_slider = input.values;
int *locations_stop = locations_slider+input.count;
/* apply values at an index based on locations */
while(locations_slider<locations_stop){
destination[*locations_slider] += *values_slider;
locations_slider++;
values_slider++;
}
return destination;
}
int* mapI2D(int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination
int *destination = (int*)calloc(RIVKey.RIVsize,sizeof(int));
int *locations_slider = locations;
......@@ -153,6 +177,22 @@ int* mapI2D(int *locations, size_t valueCount){// #TODO fix destination paramete
return destination;
}
int* addI2D(int* destination, int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination
int *locations_slider = locations;
int *locations_stop = locations_slider+valueCount;
/*apply values +1 or -1 at an index based on locations */
while(locations_slider<locations_stop){
destination[*locations_slider] +=1;
locations_slider++;
destination[*locations_slider] -= 1;
locations_slider++;
}
return destination;
}
sparseRIV consolidateD2S(int *denseInput){
sparseRIV output;
output.count = 0;
......@@ -193,7 +233,7 @@ sparseRIV consolidateD2S(int *denseInput){
}
void RIVinit(){
void RIVInit(){
RIVKey.RIVsize = RIVSIZE; //#TODO decide about macros vs global variables
RIVKey.nonZeros = NONZEROS;
......@@ -205,6 +245,13 @@ void RIVinit(){
/* open a slot at least large enough for worst case handling of
* sparse to dense conversion. may be enlarged by filetoL2 functions */
struct sigaction action;
action.sa_sigaction = signalSecure;
action.sa_flags = SA_SIGINFO;
for(int i=1; i<27; i++){
sigaction(i,&action,NULL);
}
RIVKey.h_tempBlock = (int*)malloc(3*RIVKey.RIVsize*sizeof(int));
RIVKey.tempSize = 3*RIVKey.RIVsize;
RIVKey.thing = 0;
......@@ -215,8 +262,8 @@ void RIVinit(){
void RIVCleanup(){
for(int i=0; i<RIVKey.cacheSize; i++){
fLexPush(RIVKey.RIVCache[i]);
if(cacheDump()){
puts("cache dump failed, some lexicon data was lost");
}
#if CACHESIZE > 0
free(RIVKey.RIVCache);
......@@ -251,10 +298,10 @@ void makeSparseLocations(unsigned char* word, int *locations, size_t count){
}
int fLexPush(denseRIV RIVout){
char pathString[500] = {0};
char pathString[200] = {0};
/* word data will be placed in a (new?) file under the lexicon directory
* and named after the word itself */
* in a file named after the word itself */
sprintf(pathString, "lexicon/%s", RIVout.name);
FILE *lexWord = fopen(pathString, "wb");
......@@ -268,5 +315,44 @@ int fLexPush(denseRIV RIVout){
fwrite(RIVout.values, RIVKey.RIVsize, 4, lexWord);
fclose(lexWord);
free(RIVout.values);
return 0;
}
denseRIV fLexPull(FILE* lexWord, denseRIV output){
int diagnostic = 0;
diagnostic += fread(output.frequency, 1, sizeof(int), lexWord);
diagnostic += fread(&(output.magnitude), 1, sizeof(int), lexWord);
diagnostic += fread(output.values, RIVKey.RIVsize, sizeof(int), lexWord);
fclose(lexWord);
if(diagnostic != (RIVKey.RIVsize+2)){
output.magnitude = -1;
}
return output;
}
void signalSecure(int signum, siginfo_t *si, void* arg){
if(cacheDump()){
puts("cache dump failed, some lexicon data lost");
}else{
puts("cache dumped successfully");
}
signal(signum, SIG_DFL);
kill(getpid(), signum);
}
int cacheDump(){
int flag = 0;
denseRIV* cache_slider = RIVKey.RIVCache;
denseRIV* cache_stop = RIVKey.RIVCache+RIVKey.cacheSize;
while(cache_slider<cache_stop){
if((*cache_slider).cached){
fLexPush(*cache_slider);
}
cache_slider++;
}
return flag;
}
File added
File added
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <time.h>
#define RIVSIZE 5
#define CACHESIZE 0
#define THRESHOLD 0.70
#include "RIVtoolsCPUlinux.h"
void getcentroids(sparseRIV* centroids, sparseRIV* vectorSet, int centroidCount, int vectorCount);
void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
int main(int argc, char *argv[]){
clock_t begintotal = clock();
int fileCount = 0;
RIVInit();
sparseRIV *fileRIVs = (sparseRIV*) malloc(1*sizeof(sparseRIV));
char rootString[2000];
if(argc <2){
printf("give me a directory");
return 1;
}
strcpy(rootString, argv[1]);
strcat(rootString, "/");
directoryToL2s(rootString, &fileRIVs, &fileCount);
printf("fileCount: %d\n", fileCount);
getMagnitudes(fileRIVs, fileCount);
clock_t beginnsquared = clock();
sparseRIV centroids[5];
strcpy(centroids[0].name, "boobs");
strcpy(centroids[1].name, "ass");
strcpy(centroids[2].name, "shit");
strcpy(centroids[3].name, "cocks");
strcpy(centroids[4].name, "fuck");
for(int i=0; i<5; i++){
centroids[i] = wordtoL2(centroids[i].name);
}
getMagnitudes(centroids, 5);
getcentroids(centroids, fileRIVs, 5, fileCount);
clock_t endnsquared = clock();
double time = (double)(endnsquared - beginnsquared) / CLOCKS_PER_SEC;
printf("nsquared time:%lf\n\n", time);
printf("%d <", RIVKey.thing);
clock_t endtotal = clock();
double time_spent = (double)(endtotal - begintotal) / CLOCKS_PER_SEC;
printf("total time:%lf\n\n", time_spent);
free(fileRIVs);
return 0;
}
void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount){
char pathString[2000];
DIR *directory;
struct dirent *files = 0;
if(!(directory = opendir(rootString))){
printf("location not found, %s\n", rootString);
return;
}
while((files=readdir(directory))){
if(*(files->d_name) == '.') continue;
if(files->d_type == DT_DIR){
strcpy(pathString, rootString);
strcat(pathString, files->d_name);
strcat(pathString, "/");
directoryToL2s(pathString, fileRIVs, fileCount);
}
strcpy(pathString, rootString);
strcat(pathString, files->d_name);
FILE *input = fopen(pathString, "r");
if(!input){
printf("file %s doesn't seem to exist, breaking out of loop", pathString);
return;
}else{
(*fileRIVs) = (sparseRIV*)realloc((*fileRIVs), ((*fileCount)+1)*sizeof(sparseRIV));
(*fileRIVs)[(*fileCount)] = fileToL2Clean(input);
strcpy((*fileRIVs)[(*fileCount)].name, pathString);
fclose(input);
(*fileCount)++;
}
}
}
void getcentroids(sparseRIV* centroids, sparseRIV* vectorSet, int centroidCount, int vectorCount){
float** cosines = malloc(centroidCount*sizeof(int*));
for(int i=0; i<centroidCount; i++){
cosines[i] = cosineCompare(centroids[i], vectorSet, vectorCount);
}
int* centroidIndexes[centroidCount];
int indexCounts[centroidCount];
int* denses[centroidCount];
*centroidIndexes = calloc(vectorCount*centroidCount, sizeof(int));
*denses = malloc(RIVKey.RIVsize*centroidCount * sizeof(int));
for(int i=1; i<centroidCount; i++){
centroidIndexes[i] = centroidIndexes[0]+i*vectorCount;
denses[i] = denses[0] +i*RIVKey.RIVsize;
}
float token = 2.0;
int counter = 0;
for(int i=0; i<vectorCount; i++){
token = 2.0;
printf("\nfor vector %d:\n", i);
for(int j = 0; j<centroidCount; j++){
printf("centroid %d: %f", j, cosines[j][i]);
if(fabsf(cosines[j][i])< token){
token = fabsf(cosines[j][i]);
counter = j;
}
}
centroidIndexes[counter][indexCounts[counter]] = i;
indexCounts[counter] += 1;
}
for(int i=0; i<centroidCount; i++){
memset(denses[i], 0, RIVKey.RIVsize);
printf("\n\nnumber %d\n", i);
for(int j=0; j<indexCounts[i]; i++){
addS2D(denses[i], vectorSet[j]);
for(int k=0; k<RIVKey.RIVsize; k++){
printf("%d, ", denses[i][k]);
}
}
}
}
File added
No preview for this file type
File added
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <dirent.h>
#include <error.h>
#include <time.h>
#define RIVSIZE 5000
#define CACHESIZE 0
#define THRESHOLD 0.70
#define COSINEACTION if(cosSim > THRESHOLD){ printf("%s\t%s\n%f\n", baseRIV.name, (*multipliers).name, cosSim); (*multipliers).boolean = 0; RIVKey.thing++; }
#define COSINEACTION do {\
if(cosine > THRESHOLD){ \
printf("%s\t%s\n%f\n", baseRIV.name, (*multipliers).name, cosine);\
(*multipliers).boolean = 0; \
RIVKey.thing++; \
}\
}while(0)
#include "RIVtoolsCPUlinux.h"
void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
int main(int argc, char *argv[]){
clock_t begintotal = clock();
int fileCount = 0;
RIVinit();
RIVInit();
sparseRIV *fileRIVs = (sparseRIV*) malloc(1*sizeof(sparseRIV));
char rootString[2000];
if(argc <1){
if(argc <2){
printf("give me a directory");
return 1;
}
......@@ -32,10 +36,9 @@ int main(int argc, char *argv[]){
getMagnitudes(fileRIVs, fileCount);
clock_t beginnsquared = clock();
printf("got past magnitudes");
for(int i=0; i<fileCount; i++){
for(int i=1; i<fileCount; i++){
if(fileRIVs[i].boolean){
cosineCompare(fileRIVs[i], fileRIVs+i+1, fileCount-(i+1));
cosineCompare(fileRIVs[i], fileRIVs, i);
}
}
......
No preview for this file type
File added
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define CACHESIZE 100
#define CACHESIZE 1000
#include "RIVtoolsCPUlinux.h"
#include <sys/stat.h>
#include <sys/types.h>
......@@ -17,7 +17,7 @@ void directoryGrind(char *rootString);
int main(int argc, char *argv[]){
clock_t begintotal = clock();
setKeyData();
RIVInit();
char pathString[1000];
strcpy(pathString, argv[1]);
strcat(pathString, "/");
......@@ -26,15 +26,10 @@ int main(int argc, char *argv[]){
clock_t endtotal = clock();
double time_spent = (double)(endtotal - begintotal) / CLOCKS_PER_SEC;
printf("total time:%lf\n\n", time_spent);
for( int i=0; i<RIVKey.cacheSize; i++){
printf("%s, %d", RIVKey.RIVCache[i].name, *(RIVKey.RIVCache[i].frequency));
printf("\n");
}
RIVCleanup();
return 0;
}
void addS2Ds(denseRIV *denseSet, sparseRIV additive, int RIVCount){
denseRIV *denseSet_slider;
denseRIV *dense_stop = denseSet+RIVCount;
......@@ -89,7 +84,7 @@ void directoryGrind(char *rootString){
}
strcpy(pathString, rootString);
strcat(pathString, files->d_name);
printf("%s\n", pathString);
//printf("%s\n", pathString);
FILE *input = fopen(pathString, "r+");
if(input){
fileGrind(input);
......@@ -114,15 +109,19 @@ void fileGrind(FILE* textFile){
if(!isWordClean((char*)word)){
continue;
}
if(!checkDupe(RIVArray, word, wordCount)){
if(checkDupe(RIVArray, word, wordCount)){
continue;
}
RIVArray[wordCount] = lexPull(word);
if(!*((RIVArray[wordCount].name))) break;
int* thing = RIVArray[wordCount].frequency;
*thing = *thing + 1;
//printf("%s, %d, %d\n", RIVArray[wordCount].name, *(RIVArray[wordCount].frequency), *thing);
wordCount++;
}
}
//printf("%d\n", wordCount);
......@@ -136,5 +135,5 @@ void fileGrind(FILE* textFile){
}
free(RIVArray);
free(aggregateRIV.locations);
free(aggregateRIV.values);
//free(aggregateRIV.values);
}
File added
No preview for this file type
project gutenberg scientific american supplement no
\ No newline at end of file
title scientific american supplement no february
\ No newline at end of file
rate feet work
\ No newline at end of file
cities herculaneum pompeii several smaller towns on slope
\ No newline at end of file
mountain destroyed lava buried under mass
\ No newline at end of file
pumice stones ashes second
\ No newline at end of file
contiguous mountains in iceland in two enormous lava
\ No newline at end of file
streams one miles wide over ft deep other
\ No newline at end of file
scarcely inferior flowed first miles other till
\ No newline at end of file
reached sea pouring flood white hot lava
\ No newline at end of file
ocean destroying in paths killing in water
\ No newline at end of file
ocean fish mainstay inhabitants who
\ No newline at end of file
reduced disaster directly indirectly less
\ No newline at end of file
former strength third
\ No newline at end of file
in devastated such an immense area in java but all
\ No newline at end of file
eruptions known besides as mere childs play terrible one
\ No newline at end of file
krakatoa in
\ No newline at end of file
reader will examine map east indies will find
\ No newline at end of file
represented in straits lie between sumatra
\ No newline at end of file
java little island krakatoa in maps make before will
\ No newline at end of file
hunt in vain name like bull run before then
\ No newline at end of file
unknown fame though navigators who passed through straits knew
\ No newline at end of file
vi educational competitive examinations
\ No newline at end of file
as beautiful tropical isle an extinct volcanic cone in
\ No newline at end of file
center in beginning however little well behaved
\ No newline at end of file
island showed symptoms wrath boded no good larger
\ No newline at end of file
islands in vicinity noted fine fruits
\ No newline at end of file
abounded famous picnic ground towns cities even
\ No newline at end of file
miles away subterranean rumblings mutterings
\ No newline at end of file
wrath became conspicuous people capital java
\ No newline at end of file
put steamboat requisition visited island in large
\ No newline at end of file
number time island constantly in slight tremor
\ No newline at end of file
subterranean roar like continue but distant mutterings
\ No newline at end of file
interesting details famous examinations
\ No newline at end of file
thunder but crisis reached august am
\ No newline at end of file
beautiful sunday morning water straits
\ No newline at end of file
like sea glass as clear as crystal john in
\ No newline at end of file
apocalyptic vision speaks beauty morning enhanced
\ No newline at end of file
extraordinary transparency tropical air distant mountain
\ No newline at end of file
ranges seem so near seem possible strike
\ No newline at end of file
stone cast hand only mysterious rumblings mutterings
\ No newline at end of file
pent up forces beneath island disturbed breathless calm
\ No newline at end of file
silence lay on calm before terrible
\ No newline at end of file
mightiest most awful on record burst forth sudden
\ No newline at end of file
consequences overworked competitors
\ No newline at end of file
night snatched away day eyes terrified beholders on
\ No newline at end of file
mainland but vivid play lightnings around ascending
\ No newline at end of file
column dust penetrated even deep obscurity distance
\ No newline at end of file
miles awful darkness stretched within circle diameter
\ No newline at end of file
miles while more less darkness reigned within circle
\ No newline at end of file
diameter three times as great within latter area dust
\ No newline at end of file
fall like snow sky breaking off limbs trees its weight
\ No newline at end of file
miles distant while in miles away scene
\ No newline at end of file
disaster fall depth several inches explosions
\ No newline at end of file
so loud as distinctly heard in miles away
\ No newline at end of file
sound like constant roar cannon in field
\ No newline at end of file
battle finally whole island blown pieces now came
\ No newline at end of file
most awful contest battle death between neptune
\ No newline at end of file
vulcan sea poured down chasm millions tons only
\ No newline at end of file
first converted vapor millions tons
\ No newline at end of file
seething white hot lava beneath over shores miles away waves
\ No newline at end of file
over ft high rolled such fury even
\ No newline at end of file
part bedrock swept away blocks stone tons
\ No newline at end of file
weight carry two miles inland on sumatra side
\ No newline at end of file
straits large vessel carry three miles inland wave
\ No newline at end of file
vii electrical speed engine
\ No newline at end of file
course growing less in intensity traveled across whole indian
\ No newline at end of file
ocean miles cape good hope around
\ No newline at end of file
atlantic waves in atmosphere traveled around globe three
\ No newline at end of file
times rate miles hour dust volcano
\ No newline at end of file
carry up atmosphere fully twenty miles finest
\ No newline at end of file
distribute through whole body air reader doubtless
\ No newline at end of file
remembers beautiful reddish purple glow sunrise sunset
\ No newline at end of file
fully six months after august glow caused
\ No newline at end of file
volcanic dust in atmosphere interfering passage
\ No newline at end of file
suns rays upper part solar spectrum more manifest
\ No newline at end of file
high speed compound engine running revolutions
\ No newline at end of file
sun rising setting other times day
\ No newline at end of file
periods suns rays have travel obliquely through
\ No newline at end of file
atmosphere consequently penetrating very deep layer
\ No newline at end of file
deprived all colors except red
\ No newline at end of file
loss life appalling last sight on earth
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment