package net.obsearch.index.ghs; import hep.aida.bin.StaticBin1D; import it.unimi.dsi.io.InputBitStream; import java.io.FileWriter; import java.io.IOException; import java.lang.reflect.Array; import java.nio.ByteBuffer; import java.nio.LongBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.Collections; import java.util.List; import java.util.Random; import java.util.logging.Logger; import cern.colt.bitvector.BitVector; import net.obsearch.AbstractOBResult; import net.obsearch.OB; import net.obsearch.OperationStatus; import net.obsearch.Status; import net.obsearch.cache.OBCacheByteArray; import net.obsearch.cache.OBCacheHandlerByteArray; import net.obsearch.constants.ByteConstants; import net.obsearch.constants.OBSearchProperties; import net.obsearch.exception.AlreadyFrozenException; import net.obsearch.exception.IllegalIdException; import net.obsearch.exception.OBException; import net.obsearch.exception.OBStorageException; import net.obsearch.exception.OutOfRangeException; import net.obsearch.exception.PivotsUnavailableException; import net.obsearch.filter.FilterNonEquals; import net.obsearch.index.bucket.BucketContainer; import net.obsearch.index.bucket.BucketObject; import net.obsearch.index.sorter.AbstractBucketSorter; import net.obsearch.pivots.IncrementalPairPivotSelector; import net.obsearch.pivots.IncrementalPivotSelector; import net.obsearch.pivots.PivotPairResult; import net.obsearch.pivots.PivotResult; import net.obsearch.query.AbstractOBQuery; import net.obsearch.result.OBResultInt; import net.obsearch.result.OBResultInvertedByte; import net.obsearch.storage.CloseIterator; import net.obsearch.storage.OBStorageConfig; import net.obsearch.storage.OBStoreFactory; import net.obsearch.storage.OBStoreLong; import net.obsearch.storage.TupleBytes; import net.obsearch.storage.TupleLong; import net.obsearch.storage.OBStorageConfig.IndexType; import net.obsearch.utils.Pair; import net.obsearch.utils.bytes.ByteConversion; /* OBSearch: a distributed similarity search engine This project is to similarity search what 'bit-torrent' is to downloads. Copyright (C) 2009 Arnoldo Jose Muller Molina This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /** * AbstractSketch64 encapsulates functionality common to a Sketch. The maximum * number of bits has been artificially set to 64 bits to optimize performance. * If you use less than 64 bits the keys used for the sketch only use the amount * of bits the user specifies. Nevertheless the handling of priority queues and * the matching itself occurs on longs. It should affect considerably AMD64 * architectures. If you are using a 32 bits processor or operating system, then * this implementation will not be as efficient. * * @author Arnoldo Jose Muller Molina */ public abstract class AbstractSketch64, Q, BC extends BucketContainer> extends AbstractBucketSorter { static final transient Logger logger = Logger .getLogger(AbstractSketch64.class.getName()); /** * Number of bits. */ protected int m; /** * Only two values per dimension (1, 0) */ private static final int HEIGHT = 2; /** * pivot grid. */ protected O[][] pivotGrid; /** * Distortion stats */ protected int[][] distortionStats; /** * Pivot selector for the masks. */ protected IncrementalPairPivotSelector maskPivotSelector; public AbstractSketch64(){ super(); } public AbstractSketch64(Class type, IncrementalPairPivotSelector pivotSelector, int m, int bucketPivotCount) throws OBStorageException, OBException { super(type, null, 0, bucketPivotCount); this.m = m; this.maskPivotSelector = pivotSelector; pivotGrid = (O[][]) Array.newInstance(type, m, HEIGHT); distortionStats = new int[m][HEIGHT]; } protected byte[] compactRepresentationToBytes(CBitVector cp){ return cp.store(); } protected CBitVector bytesToCompactRepresentation(byte[] data){ return new CBitVector(data, m); } public void freeze() throws AlreadyFrozenException, IllegalIdException, OutOfRangeException, IllegalAccessException, InstantiationException, OBException, PivotsUnavailableException, IOException { super.freeze(); int i = 0; /* * while (i < m) { int cx = 0; PivotResult r = * super.selectPivots(HEIGHT, this.maskPivotSelector); while (cx < * HEIGHT) { pivotGrid[i][cx] = getObject(r.getPivotIds()[cx]); cx++; } * i++; } */ PivotPairResult pivots = maskPivotSelector.generatePivotsPair(m, null, this); // fill the pivots in the grid. i = 0; for (Pair p : pivots.getPairs()) { pivotGrid[i][0] = p.getA(); pivotGrid[i][1] = p.getB(); i++; } logger.info("Moving objects to the buckets..."); freezeDefault(); this.bucketCache.clearAll(); // test the mask thing. logger.info("Loading masks..."); loadMasks(); logger.info("Calculating estimators..."); calculateEstimators(); logger.info("Index stats..."); bucketStats(); sketchStats(); } public void debugDist() throws IllegalIdException, OBStorageException, IllegalAccessException, InstantiationException, OBException, IOException{ int i = 0; Random r = new Random(); FileWriter f = new FileWriter("my" + this.getPivotCount() + ".csv"); StaticBin1D[] stats = new StaticBin1D[m + 1]; while(i < stats.length){ stats[i] = new StaticBin1D(); i++; } i = 0; while(i < databaseSize()){ O a = getObject(r.nextInt((int)databaseSize())); O b = getObject(r.nextInt((int)databaseSize())); SketchProjection sa = getProjection(getBucket(a)); SketchProjection sb = getProjection(getBucket(b)); double d = distance(a,b); int s = sa.distance(sb.getSketch()).getDistance(); stats[s].add(d); i++; } i = 0; f.write("#ds distance avg error max dmin\n"); for(StaticBin1D s : stats){ if(s.size() > 0){ f.write(i + "\t" + s.mean() + "\t" + + s.standardDeviation() + "\t" + s.min() + "\t" + s.max() + "\n"); } i++; } f.close(); } @Override protected void updateDistance(SketchProjection query, CBitVector proj, FixedPriorityQueue queue) { int distance = query.hamming(proj); if(! queue.isFull() || distance < queue.peek().getDistance() ){ if(queue.isFull()){ SketchProjection p = queue.poll(); p.setSketch(proj); p.setDistance(distance); queue.offer(p); }else{ queue.add(new SketchProjection(null, proj, distance, null)); } } } private void sketchStats() throws OBStorageException { if (HEIGHT == 2) { StaticBin1D s = new StaticBin1D(); int i = 0; while (i < distortionStats.length) { s.add(Math.abs(this.distortionStats[i][0] - this.distortionStats[i][1])); i++; } logger.info("Distortion:"); logger.info(s.toString()); logger.info("Distortion:" + s.mean() / databaseSize()); } } }