00001 #pragma warning( disable : 4786 )
00002 #include "compression.h"
00003 #ifndef GROUPINDEX
00004 #include "GroupIndex.h"
00005 #endif
00006
00007 namespace groupindex
00008 {
00009
00010 template<class Group>
00011
00013
00018 class CompressedGInvertedList : public GInvertedList<Group>
00019 {
00020
00021
00022 public:
00023
00024
00025 unsigned long byte_size;
00026
00027 Group g_factor;
00028
00029 CompressedGInvertedList<Group>()
00030 {
00031 elements = NULL;
00032
00033 insertion_file_name[0] = '\0';
00034 element_count = 0;
00035
00036 mode = MODE_CREATE_INDEX;
00037
00038
00039
00040 }
00041
00042
00043 ListTupel<Group> operator[](int index)
00044
00045
00046
00047 {
00048
00049 ListTupel<Group> lt = ListTupel<Group>();
00050
00051 if (mode!=MODE_SEARCH)
00052 return lt;
00053
00054 if (elements==NULL)
00055 readElements();
00056
00057 lt = elements[index];
00058
00059 return lt;
00060
00061 };
00062
00063 void mult(Group g)
00064 {
00065
00066
00067
00068
00069 g_factor = g;
00070
00071 }
00072
00073 unsigned long size()
00074 {
00075 return element_count;
00076 }
00077
00078 long byteSize()
00079 {
00080 return sizeof(ListTupel<Group>)*size();
00081 }
00082
00083 bool writeToFile(FILE* stream)
00084
00085
00086 {
00087
00088 if (mode!=MODE_CREATE_INDEX)
00089 return false;
00090
00091 if (element_count<=0)
00092 return true;
00093
00094 FILE* insertion_stream;
00095 insertion_stream = fopen(insertion_file_name,"rb");
00096 if (insertion_stream == NULL)
00097 return false;
00098
00099
00100 rewind(insertion_stream);
00101
00102 gsize = sizeof(ListTupel<Group>);
00103
00104 ListTupel<Group>* elements = new ListTupel<Group>[element_count];
00105
00106
00107 for (int i=0; i<element_count; i++)
00108 {
00109 fread(&(elements[i]), gsize, 1, insertion_stream);
00110 if (ferror(insertion_stream)!=0)
00111 return false;
00112 }
00113
00114 fclose(insertion_stream);
00115
00116
00117 heapsort< ListTupel<Group> >(elements,element_count);
00118
00119 int bitPos = 31;
00120 int longPos = 0;
00121
00122
00123
00124 unsigned long bitSize = 0;
00125 unsigned long maxSize;
00126 unsigned long unique_element_count = element_count;
00127 for (i=0; i<element_count; i++)
00128 {
00129 if ((elements[i]==elements[i-1]))
00130
00131 unique_element_count--;
00132 else
00133 {
00134 unsigned long isize = deltaSize(&(elements[i].i),sizeof(int));
00135 unsigned long gsize = deltaSize(&(elements[i].g),sizeof(Group));
00136 bitSize += isize;
00137 bitSize += gsize;
00138 if (isize>maxSize)
00139 maxSize = isize;
00140 if (gsize>maxSize)
00141 maxSize = isize;
00142 }
00143 }
00144
00145
00146 unsigned long byte_size = (bitSize+31)/8;
00147 void* buffer = malloc(byte_size);
00148
00149
00150 deltaEncode(&(elements[0].i),sizeof(int),buffer,longPos,bitPos);
00151 deltaEncode(&(elements[0].g),sizeof(Group),buffer,longPos,bitPos);
00152
00153 for (i=1; i<element_count; i++)
00154 {
00155 if (!(elements[i]==elements[i-1]))
00156 {
00157 deltaEncode(&(elements[i].i),sizeof(int),buffer,longPos,bitPos);
00158 deltaEncode(&(elements[i].g),sizeof(Group),buffer,longPos,bitPos);
00159 }
00160 };
00161
00162
00163
00164 fwrite(&unique_element_count,sizeof(unsigned long),1,stream);
00165
00166 unsigned long lpos = longPos+1;
00167 fwrite(&byte_size,sizeof(unsigned long),1,stream);
00168
00169 fwrite(buffer,1,byte_size,stream);
00170
00171
00172 delete[] elements;
00173 free(buffer);
00174
00175
00176 remove(insertion_file_name);
00177
00178 return true;
00179
00180 }
00181
00182 void initFileRead(FILE* stream)
00183 {
00184
00185 my_fileoffset = ftell(stream);
00186
00187
00188 fread(&element_count, sizeof(long), 1, stream);
00189
00190
00191 fread(&byte_size, sizeof(unsigned long), 1, stream);
00192
00193 mystream = stream;
00194
00195 mode = MODE_SEARCH;
00196
00197
00198 fseek(stream, byte_size, SEEK_CUR);
00199
00200 }
00201
00202 bool readElements(ListTupel<Group>* list_buffer)
00203
00204
00205 {
00206
00207 fseek(mystream, my_fileoffset + 2*sizeof(unsigned long), SEEK_SET);
00208
00209 void* buffer;
00210 buffer = malloc(byte_size);
00211
00212 long read_cnt = fread(buffer, 1, byte_size, mystream);
00213 if (read_cnt < byte_size)
00214 {
00215 return false;
00216 }
00217
00218 int longPos = 0;
00219 int bitPos = 31;
00220
00221 for (int i=0; i<element_count; i++)
00222 {
00223 if (deltaDecode(buffer, (void*)&(list_buffer[i].i), longPos, bitPos)==0)
00224 list_buffer[i].i = 0;
00225 if (deltaDecode(buffer, (void*)&(list_buffer[i].g), longPos, bitPos)==0)
00226 zeroes(&(list_buffer[i].g),sizeof(Group));
00227 list_buffer[i].g.mult(g_factor);
00228 }
00229
00230
00231 free(buffer);
00232
00233 return true;
00234
00235 }
00236
00237 bool readElements()
00238 {
00239 if (elements!=NULL)
00240 return true;
00241
00242 elements = new ListTupel<Group>[element_count];
00243 return readElements(elements);
00244 }
00245
00246 void intersect(ResultList<Group, CompressedGInvertedList<Group> >* set2)
00247 {
00248 if (elements==NULL)
00249 readElements();
00250
00251
00252 set2->readToBuffer(set2->elements);
00253
00254 ListTupel<Group>* elements2 = set2->elements;
00255
00256 int insert_position = 0;
00257
00258 for (int i=0; i < element_count; i++)
00259 {
00260
00261
00262 ListTupel<Group> current = elements[i];
00263
00264 Group g_m_2_inv = set2->g_m;
00265 g_m_2_inv.invert();
00266
00267 ListTupel<Group> l1 = elements[i];
00268
00269
00270
00271
00272
00273 int left = 0;
00274 int right = set2->size();
00275 bool found = false;
00276 int mid = 0;
00277
00278 while (left<=right && !found)
00279 {
00280
00281 mid = (left+right)/2;
00282
00283 ListTupel<Group> cmp = elements2[mid];
00284
00285
00286
00287 cmp.g.mult(g_m_2_inv);
00288
00289 if (l1.i==cmp.i && l1.g.isSimilar(cmp.g))
00290 found = true;
00291 else if (l1<cmp)
00292 left = mid+1;
00293 else
00294 right = mid-1;
00295 }
00296
00297
00298 if (found)
00299 {
00300
00301 elements[insert_position] = elements[i];
00302 insert_position++;
00303 }
00304
00305 }
00306
00307 element_count = insert_position;
00308
00309 };
00310
00311
00312 ~CompressedGInvertedList<Group>()
00313 {
00314
00315 if (elements!=NULL)
00316 delete[] elements;
00317
00318 elements = NULL;
00319 }
00320
00321 };
00322
00323 }