001    /*
002     * Copyright (c) 1998-2014 ChemAxon Ltd. All Rights Reserved.
003     *
004     * This software is the confidential and proprietary information of
005     * ChemAxon. You shall not disclose such Confidential Information
006     * and shall use it only in accordance with the terms of the agreements
007     * you entered into with ChemAxon.
008     *
009     */
010    
011    package com.chemaxon.descriptors.metrics;
012    
013    import com.chemaxon.apidiscovery.annotations.Description;
014    import com.chemaxon.common.annotations.PublicAPI;
015    import com.google.common.annotations.Beta;
016    
017    /**
018     * Simple metrics suitable for finite length binary vectors.
019     *
020     * <p>Please note that this enum is marked with @Beta annotation, so it can be subject of incompatible changes or
021     * removal in later releases.</p>
022     *
023     * @author Gabor Imre
024     */
025    
026    @Beta
027    @PublicAPI
028    public enum BinaryMetrics implements BinaryVectorComparator {
029    
030        /**
031         * Manhattan distance.
032         *
033         * <p>
034         * Binary positions with different values.
035         * </p>
036         *
037         * @see <a href="http://en.wikipedia.org/wiki/Taxicab_geometry">http://en.wikipedia.org/wiki/Taxicab_geometry</a>
038         */
039        @Description(
040                shortName = "Manhattan",
041                name = "Manhattan distance",
042                description = "Dissimilarity value is calculated as the number of bit positions containing differing"
043                    + " values. This measure is also known as \"Taxicab geometry\"")
044        MANHATTAN {
045            public boolean isDissimilarityZeroIFFEquals() {
046                return true;
047            }
048    
049            public boolean isNonNegative() {
050                return true;
051            }
052    
053            public boolean isSymmetric() {
054                return true;
055            }
056    
057            public boolean isTriangeInequalityHolds() {
058                return true;
059            }
060    
061            public boolean isMetricSpace() {
062                return true;
063            }
064    
065            @Override
066            public double getLowerBoundForBinaryVectors(final int size) {
067                return 0;
068            }
069    
070            @Override
071            public double getUpperBoundForBinaryVectors(final int size) {
072                return size;
073            }
074    
075            @Override
076            public double calculateBinaryVectorDissimilarity(final int size, final int targetOnly,
077                    final int queryOnly, final int common) {
078                BinaryVectorComparator.Util.checkBitCounts(size, targetOnly, queryOnly, common);
079                return targetOnly + queryOnly;
080            }
081    
082            @Override
083            public double calculateBinaryVectorSimilarity(int bitCount, int targetOnly, int queryOnly,
084                    int common) {
085                return getUpperBoundForBinaryVectors(bitCount)
086                        - calculateBinaryVectorDissimilarity(bitCount, targetOnly, queryOnly, common);
087            }
088        },
089    
090        /**
091         * Normalized Manhattan metric.
092         *
093         * <p>The similarity/dissimilarity values range of the {@link BinaryMetrics#MANHATTAN} normalized to the unit
094         * interval (0.0 - 1.0, including bounds).</p>
095         */
096        @Description(
097                shortName = "Manhattan (norm.)",
098                name = "Manhattan distance (normalized)",
099                description = "Dissimilarity value is calculated as the number of bit positions containing differing"
100                    + " values. This measure is also known as \"Taxicab geometry\". Similarity/dissimilarity value range"
101                    + " is normalized to the unit interval (0.0 -  1.0 including bounds).")
102        MANHATTAN_NORMALIZED {
103            public boolean isDissimilarityZeroIFFEquals() {
104                return true;
105            }
106    
107            public boolean isNonNegative() {
108                return true;
109            }
110    
111            public boolean isSymmetric() {
112                return true;
113            }
114    
115            public boolean isTriangeInequalityHolds() {
116                return true;
117            }
118    
119            public boolean isMetricSpace() {
120                return true;
121            }
122    
123            @Override
124            public double getLowerBoundForBinaryVectors(int size) {
125                return 0;
126            }
127    
128            @Override
129            public double getUpperBoundForBinaryVectors(int size) {
130                return 1.0;
131            }
132    
133            @Override
134            public double calculateBinaryVectorDissimilarity(int bitCount, int targetOnly,
135                    int queryOnly, int common) {
136                BinaryVectorComparator.Util.checkBitCounts(bitCount, targetOnly, queryOnly, common);
137                return (double) (targetOnly + queryOnly) / bitCount;
138            }
139    
140            @Override
141            public double calculateBinaryVectorSimilarity(int bitCount, int targetOnly, int queryOnly,
142                    int common) {
143                return getUpperBoundForBinaryVectors(bitCount)
144                        - calculateBinaryVectorDissimilarity(bitCount, targetOnly, queryOnly, common);
145            }
146        },
147    
148        /**
149         * Euclidean distance.
150         *
151         * <p>
152         * Square root of the sums of the difference squares.
153         * <p>
154         *
155         * @see <a href="http://en.wikipedia.org/wiki/Euclidean_distance">http://en.wikipedia.org/wiki/Euclidean_distance</a>
156         */
157        @Description(
158                shortName = "Euclidean",
159                name = "Binary Euclidean distance",
160                description = "Dissimilarity value is calculated as the square root of the number of bit positions"
161                + " containing differing values.")
162        BINARY_EUCLIDEAN {
163            public boolean isDissimilarityZeroIFFEquals() {
164                return true;
165            }
166    
167            public boolean isNonNegative() {
168                return true;
169            }
170    
171            public boolean isSymmetric() {
172                return true;
173            }
174    
175            public boolean isTriangeInequalityHolds() {
176                return true;
177            }
178    
179            public boolean isMetricSpace() {
180                return true;
181            }
182    
183            @Override
184            public double getLowerBoundForBinaryVectors(int size) {
185                return 0;
186            }
187    
188            @Override
189            public double getUpperBoundForBinaryVectors(int size) {
190                return Math.sqrt(size);
191            }
192    
193            @Override
194            public double calculateBinaryVectorDissimilarity(int size, int targetOnly, int queryOnly,
195                    int common) {
196                BinaryVectorComparator.Util.checkBitCounts(size, targetOnly, queryOnly, common);
197                return Math.sqrt(targetOnly + queryOnly);
198            }
199    
200            @Override
201            public double calculateBinaryVectorSimilarity(int bitCount, int targetOnly, int queryOnly,
202                    int common) {
203                return getUpperBoundForBinaryVectors(bitCount)
204                        - calculateBinaryVectorDissimilarity(bitCount, targetOnly, queryOnly, common);
205            }
206        },
207    
208        /**
209         * Normalized Euclidean distance.
210         *
211         * <p>The similarity/dissimilarity values range of the {@link BinaryMetrics#BINARY_EUCLIDEAN} normalized to the unit
212         * interval (0.0 - 1.0, including bounds).</p>
213         */
214        @Description(
215                shortName = "Euclidean (norm.)",
216                name = "Binary Euclidean distance (normalized)",
217                description = "Dissimilarity value is calculated as the square root of the number of bit positions"
218                    + " containing differing values. Similarity/dissimilarity value range is normalized to the"
219                    + " unit interval (0.0 -  1.0 including bounds).")
220        BINARY_EUCLIDEAN_NORMALIZED {
221            public boolean isDissimilarityZeroIFFEquals() {
222                return true;
223            }
224    
225            public boolean isNonNegative() {
226                return true;
227            }
228    
229            public boolean isSymmetric() {
230                return true;
231            }
232    
233            public boolean isTriangeInequalityHolds() {
234                return true;
235            }
236    
237            public boolean isMetricSpace() {
238                return true;
239            }
240    
241            @Override
242            public double getLowerBoundForBinaryVectors(int size) {
243                return 0;
244            }
245    
246            @Override
247            public double getUpperBoundForBinaryVectors(int size) {
248                return 1.0;
249            }
250    
251            @Override
252            public double calculateBinaryVectorDissimilarity(int size, int targetOnly, int queryOnly,
253                    int common) {
254                BinaryVectorComparator.Util.checkBitCounts(size, targetOnly, queryOnly, common);
255                return Math.sqrt((double) (targetOnly + queryOnly) / size);
256            }
257    
258            @Override
259            public double calculateBinaryVectorSimilarity(int bitCount, int targetOnly, int queryOnly,
260                    int common) {
261                return getUpperBoundForBinaryVectors(bitCount)
262                        - calculateBinaryVectorDissimilarity(bitCount, targetOnly, queryOnly, common);
263            }
264        },
265    
266        /**
267         * Commonly set bits.
268         *
269         * <p>
270         * The count of bit positions which contains set bits is considered as the similarity value. Please note that this
271         * comparison yields non-zero and non-maximal dissimilarity for equal, non-empty descriptors.
272         * </p>
273         */
274        @Description(
275                shortName = "Commonpart",
276                name = "Binary common part distance",
277                description = "Similarity value is calculated as the bit positions containing set values in both compared"
278                    + " descriptor. Dissimilarity value is calculated by substracting this common bit counts from the"
279                    + " fingerprint length.")
280        BINARY_COMMONPART {
281            public boolean isDissimilarityZeroIFFEquals() {
282                return false;
283            }
284    
285            public boolean isNonNegative() {
286                return true;
287            }
288    
289            public boolean isSymmetric() {
290                return true;
291            }
292    
293            public boolean isTriangeInequalityHolds() {
294                return false;
295            }
296    
297            public boolean isMetricSpace() {
298                return false;
299            }
300    
301            @Override
302            public double getLowerBoundForBinaryVectors(int size) {
303                return 0;
304            }
305    
306            @Override
307            public double getUpperBoundForBinaryVectors(int size) {
308                return size;
309            }
310    
311            @Override
312            public double calculateBinaryVectorDissimilarity(int size, int targetOnly, int queryOnly,
313                    int common) {
314                BinaryVectorComparator.Util.checkBitCounts(size, targetOnly, queryOnly, common);
315                return size - common;
316            }
317    
318            @Override
319            public double calculateBinaryVectorSimilarity(int bitCount, int targetOnly, int queryOnly,
320                    int common) {
321                return getUpperBoundForBinaryVectors(bitCount)
322                        - calculateBinaryVectorDissimilarity(bitCount, targetOnly, queryOnly, common);
323            }
324        },
325    
326        /**
327         * Common set bit ratio.
328         *
329         * <p>The similarity/dissimilarity values range of the {@link BinaryMetrics#BINARY_COMMONPART} normalized to the unit
330         * interval (0.0 - 1.0, including bounds).</p>
331         */
332        @Description(
333                shortName = "Commonpart (norm.)",
334                name = "Binary common part distance (normalized)",
335                description = "Similarity value is calculated as the bit positions containing set values in both compared"
336                    + " descriptor. Dissimilarity value is calculated by substracting this common bit counts from the"
337                    + " fingerprint length. Similarity/dissimilarity value range is normalized to the unit interval"
338                    + " (0.0 -  1.0 including bounds) by dividing the values with the fingerprint length.")
339        BINARY_COMMONPART_NORMALIZED {
340            public boolean isDissimilarityZeroIFFEquals() {
341                return false;
342            }
343    
344            public boolean isNonNegative() {
345                return true;
346            }
347    
348            public boolean isSymmetric() {
349                return true;
350            }
351    
352            public boolean isTriangeInequalityHolds() {
353                return false;
354            }
355    
356            public boolean isMetricSpace() {
357                return false;
358            }
359    
360            @Override
361            public double getLowerBoundForBinaryVectors(int size) {
362                return 0;
363            }
364    
365            @Override
366            public double getUpperBoundForBinaryVectors(int size) {
367                return 1.0;
368            }
369    
370            @Override
371            public double calculateBinaryVectorDissimilarity(int size, int targetOnly, int queryOnly,
372                    int common) {
373                BinaryVectorComparator.Util.checkBitCounts(size, targetOnly, queryOnly, common);
374                return 1.0 - (double) common / size;
375            }
376    
377            @Override
378            public double calculateBinaryVectorSimilarity(int bitCount, int targetOnly, int queryOnly,
379                    int common) {
380                return getUpperBoundForBinaryVectors(bitCount)
381                        - calculateBinaryVectorDissimilarity(bitCount, targetOnly, queryOnly, common);
382            }
383        },
384    
385        /**
386         * Dice's similarity measure.
387         *
388         * <p>Similarity coefficient is twice the common part divided by the sum of components.</p>
389         *
390         * <p>When comparing all-zero binary fingerprint we return minimal similarity/maximal dissimilarity.</p>
391         *
392         * @see <a href="http://en.wikipedia.org/wiki/Dice%27s_coefficient">http://en.wikipedia.org/wiki/Dice%27s_coefficient</a>
393         */
394        @Description(
395                shortName = "Dice",
396                name = "Dice's similarity measure",
397                description = "Similarity coefficient is twice the size of common part divided by the sum of components."
398                    + " When comparing all-zero binary fingerprint we return minimal similarity (maximal dissimilarity).")
399        BINARY_DICE {
400            public boolean isDissimilarityZeroIFFEquals() {
401                return true;
402            }
403    
404            public boolean isNonNegative() {
405                return true;
406            }
407    
408            public boolean isSymmetric() {
409                return true;
410            }
411    
412            public boolean isTriangeInequalityHolds() {
413                return false;
414            }
415    
416            public boolean isMetricSpace() {
417                return false;
418            }
419    
420            @Override
421            public double getLowerBoundForBinaryVectors(int size) {
422                return 0;
423            }
424    
425            @Override
426            public double getUpperBoundForBinaryVectors(int size) {
427                return 1.0;
428            }
429    
430            @Override
431            public double calculateBinaryVectorDissimilarity(int size, int targetOnly, int queryOnly,
432                    int common) {
433                BinaryVectorComparator.Util.checkBitCounts(size, targetOnly, queryOnly, common);
434                if (targetOnly == 0 && queryOnly == 0 && common == 0) {
435                    return 1.0;
436                } else {
437                    return 1.0 - 2.0 * (double) common / (targetOnly + queryOnly + 2 * common);
438                }
439            }
440    
441            @Override
442            public double calculateBinaryVectorSimilarity(int bitCount, int targetOnly, int queryOnly,
443                    int common) {
444                return getUpperBoundForBinaryVectors(bitCount)
445                        - calculateBinaryVectorDissimilarity(bitCount, targetOnly, queryOnly, common);
446            }
447        },
448    
449        /**
450         * Tanimoto coefficient.
451         *
452         * <p>When comparing all-zero binary fingerprint we return minimal similarity/maximal dissimilarity in the current
453         * implementation.</p>
454         *
455         * <p>Please note that the triangle inequality holds for the Tanimoto distance; see
456         * Lipkus, AlanH.: A proof of the triangle inequality for the Tanimoto distance (Journal of Mathematical Chemistry,
457         * 1999, volume 26, pages 263-265)</p>
458         *
459         * @see <a href="http://en.wikipedia.org/wiki/Jaccard_index">http://en.wikipedia.org/wiki/Jaccard_index</a>
460         */
461        @Description(
462                shortName = "Tanimoto",
463                name = "Tanimoto coefficient",
464                description = "Tanimoto similarity coefficient (often referred as Jaccard index) is calculated by"
465                    + " dividing the size of intersection (number of bit positions set in both descriptor) with the size"
466                    + " of the union (number of bit positions set in either descriptor)."
467                    + " When comparing all-zero binary fingerprint we return minimal similarity (maximal dissimilarity).")
468        BINARY_TANIMOTO {
469            public boolean isDissimilarityZeroIFFEquals() {
470                return true;
471            }
472    
473            public boolean isNonNegative() {
474                return true;
475            }
476    
477            public boolean isSymmetric() {
478                return true;
479            }
480    
481            public boolean isTriangeInequalityHolds() {
482                return true;
483            }
484    
485            public boolean isMetricSpace() {
486                return true;
487            }
488    
489            @Override
490            public double getLowerBoundForBinaryVectors(int size) {
491                return 0;
492            }
493    
494            @Override
495            public double getUpperBoundForBinaryVectors(int size) {
496                return 1.0;
497            }
498    
499            @Override
500            public double calculateBinaryVectorDissimilarity(int size, int targetOnly, int queryOnly,
501                    int common) {
502                BinaryVectorComparator.Util.checkBitCounts(size, targetOnly, queryOnly, common);
503                if (targetOnly == 0 && queryOnly == 0 && common == 0) {
504                    return 1.0;
505                } else {
506                    return 1.0 - (double) common / (targetOnly + queryOnly + common);
507                }
508            }
509    
510            @Override
511            public double calculateBinaryVectorSimilarity(int bitCount, int targetOnly, int queryOnly,
512                    int common) {
513                return getUpperBoundForBinaryVectors(bitCount)
514                        - calculateBinaryVectorDissimilarity(bitCount, targetOnly, queryOnly, common);
515            }
516                };
517    
518    }