001    /*
002     * Copyright (c) 1998-2014 ChemAxon Ltd. All Rights Reserved.
003     *
004     * This software is the confidential and proprietary information of
005     * ChemAxon. You shall not disclose such Confidential Information
006     * and shall use it only in accordance with the terms of the agreements
007     * you entered into with ChemAxon.
008     *
009     */
010    
011    package com.chemaxon.clustering.common;
012    
013    import com.chemaxon.common.annotations.PublicAPI;
014    import com.google.common.annotations.Beta;
015    import java.util.ArrayList;
016    import java.util.Arrays;
017    import java.util.Collection;
018    import java.util.HashSet;
019    import java.util.Iterator;
020    import java.util.LinkedList;
021    import java.util.List;
022    import java.util.Set;
023    
024    /**
025     * An ID based cluster level assigner builder.
026     *
027     * <p>Please note that the following must be satisfied:
028     * <ul>
029     * <li><b>root aligned</b> clusterings:
030     * <ul><li>0 level must have assigned to all root clusters</li>
031     * <li>Child clusters must have assigned no lower level than their respective parents</li>
032     * <li>Leaves must have assigned no lower level than their parents</li>
033     * </ul></li>
034     * <li><b>leaves aligned</b> clusterings:
035     * <ul><li>0 level must have assigned to all leaves. </li>
036     * <li>Child clusters must have assigned no greater level than their respective parents</li>
037     * <li>It is ok to not assign explicitly zero levels for leaves</li>
038     * </ul></li>
039     * </ul>
040     * </p>
041     *
042     * <p>Checking of the above conditions are not enforced during setup.</p>
043     *
044     * <p>Please note that this class is marked with {@link Beta} annotation, so it can be subject of incompatible changes
045     * or removal in later releases.</p>
046     *
047     * @author Gabor Imre
048     */
049    @Beta
050    @PublicAPI
051    public class IDBasedAssignerBuilder {
052    
053        /**
054         * Set cluster levels.
055         */
056        private final List<Double> clusterLevels = new ArrayList<Double>();
057    
058        /**
059         * Set leaf levels.
060         */
061        private final List<Double> leafLevels = new ArrayList<Double>();
062    
063        /**
064         * Ensure that an index is present in a list by filling it with <code>null</code>s.
065         *
066         * @param list  A list
067         * @param index An index needed to be present
068         */
069        private static void ensureIndex(List<?> list, int index) {
070            while (list.size() < index + 1) {
071                list.add(null);
072            }
073        }
074    
075        /**
076         * Set the level of a specific cluster.
077         *
078         * <p>If level information for a specific cluster is already set it will be overwritten. Note that <code>0</code>
079         * level must be explicitly set for roots when clustering is root aligned.</p>
080         *
081         * @param clusterID     ID of cluster
082         * @param level         Level to assign
083         */
084        public void setClusterLevel(int clusterID, double level) {
085            if (level < 0) {
086                throw new IllegalArgumentException("Level assigend to cluster id " + clusterID + "must be non negative: "
087                        + level);
088            }
089            ensureIndex(this.clusterLevels, clusterID);
090            this.clusterLevels.set(clusterID, level);
091        }
092    
093        /**
094         * Get the current associated level for a cluster.
095         *
096         * @param clusterID ID of a cluster
097         * @return          Associated level for the cluster
098         * @throws IllegalArgumentException when no level set for the given cluster
099         */
100        public double getClusterLevel(int clusterID) {
101            if (clusterID < 0) {
102                throw new IllegalArgumentException("Cluster ID must be non negative " + clusterID);
103            }
104            if (this.clusterLevels.size() <= clusterID || this.clusterLevels.get(clusterID) == null) {
105                throw new IllegalArgumentException("Level for cluster " + clusterID + " is not set.");
106            }
107            return this.clusterLevels.get(clusterID);
108        }
109    
110        /**
111         * Get the current associated level for a leaf.
112         *
113         * @param leafID    ID of a leaf
114         * @return          Associated level of the leaf
115         * @throws IllegalArgumentException when no level set for the given leaf
116         */
117        public double getLeafLevel(int leafID) {
118            if (leafID < 0) {
119                throw new IllegalArgumentException("Leaf ID must be non negative " + leafID);
120            }
121            if (this.leafLevels.size() <= leafID || this.leafLevels.get(leafID) == null) {
122                throw new IllegalArgumentException("Level for leaf " + leafID + " is not set.");
123            }
124            return this.leafLevels.get(leafID);
125        }
126    
127    
128        /**
129         * Set the level of a specific leaf node.
130         *
131         * <p>If level information for a specific leaf is already set it will be overwritten. Note that explicitly setting
132         * <code>0</code> level of leaves for leaf aligned clusterings is not needed. Mixed setting is also possible.</p>
133         *
134         * @param leafID        ID of the leaf
135         * @param level         Level to assign
136         */
137        public void setLeafLevel(int leafID, double level) {
138            if (level < 0) {
139                throw new IllegalArgumentException("Level assigend to leaf id " + leafID + "must be non negative: "
140                        + level);
141            }
142            ensureIndex(this.leafLevels, leafID);
143            this.leafLevels.set(leafID, level);
144        }
145    
146    
147    
148        /**
149         * Check level assignment consistency for a cluster.
150         *
151         * @param c                      A Cluster to check
152         * @param alignment              Clustering alignment
153         *
154         * @throws IllegalStateException if the alignment is found to be inconsistent regarding given clustering
155         */
156        private void checkClusterConsistency(IDBasedHierarchicCluster c, HierarchicClustering.Alignment alignment) {
157            // current cluster id
158            final int cid = c.getClusterID();
159    
160            if (this.clusterLevels.size() <= cid || this.clusterLevels.get(cid) == null) {
161                throw new IllegalStateException("Level for cluster ID " + cid + " is not specified.");
162            }
163    
164            // Cluster level
165            final double cl = this.clusterLevels.get(cid);
166    
167            if (c.parent().isPresent()) {
168                final int pid = c.parent().get().getClusterID();
169                if (this.clusterLevels.size() <= pid || this.clusterLevels.get(pid) == null) {
170                    throw new IllegalStateException("Level for cluster ID " + pid + " (parent of cluster ID " + cid
171                            + ") is not specified.");
172                }
173                final double pl = this.clusterLevels.get(pid);
174                if (alignment == HierarchicClustering.Alignment.ROOT_ALIGNED) {
175                    if (cl < pl) {
176                        throw new IllegalStateException("In root aligned clustering level of parent cluster " // NOOMD
177                                + pl + " (id " + pid + ") should be no greater than the level of child cluster " // NOPMD
178                                + cl + " (id " + cid); // NOPMD
179                    }
180                } else {
181                    if (cl > pl) {
182                        throw new IllegalStateException("In leaves aligned clustering level of parent cluster " // NOOMD
183                                + pl + " (id " + pid + ") should be no less than the level of child cluster " // NOOMD
184                                + cl + " (id " + cid); // NOOMD
185                    }
186                }
187    
188            } else if (alignment == HierarchicClustering.Alignment.ROOT_ALIGNED) {
189                if (this.clusterLevels.get(cid) != 0.0) { // NOPMD
190                    throw new IllegalStateException("In root aligned clusterings Level for root cluster ID " + cid
191                            + " should be zero instead of " + cl);
192                }
193            }
194    
195            //  Check leaves
196            if (alignment == HierarchicClustering.Alignment.LEAF_ALIGNED) {
197                for (int i = 0; i < this.leafLevels.size(); i++) {
198                    if (this.leafLevels.get(i) != null && this.leafLevels.get(i) != 0.0) {
199                        throw new IllegalStateException("In lead aligned clusterings level for lead ID " + i
200                                + " should be zero instead of " + this.leafLevels.get(i));
201                    }
202                }
203            } else {
204                for (int li : c.leaves()) {
205                    if (this.leafLevels.size() <= li || this.leafLevels.get(li) == null) {
206                        throw new IllegalStateException("Level of leaf ID " + li + " (of cluster ID " + cid
207                                + " is not specified");
208                    }
209                    final double ll = this.leafLevels.get(li);
210                    if (cl > ll) {
211                        throw new IllegalStateException("Level of lead (ID " + li + ") " + ll
212                                + " should be greater than level of respective parent cluster (ID " + cid
213                                + ") " + cl);
214                    }
215                }
216            }
217        }
218    
219        /**
220         * Check level assignment consistency.
221         *
222         * @param hierarcy               Clustering hierarchy
223         * @throws IllegalStateException if the alignment is found to be inconsistent regarding given clustering
224         */
225        // TODO: cover corner cases with junit tests
226        private void checkConsistency(IDBasedHierarchy hierarcy) {
227    
228            final ArrayList<IDBasedHierarchicCluster> queue = new ArrayList<IDBasedHierarchicCluster>();
229    
230            queue.addAll(hierarcy.roots());
231    
232            while (!queue.isEmpty()) {
233                final IDBasedHierarchicCluster c = queue.remove(queue.size() - 1);
234    
235                checkClusterConsistency(c, hierarcy.preferredAlignment());
236    
237                // Add children to the queu
238                queue.addAll(c.clusters());
239            }
240        }
241    
242    
243        /**
244         * Build the assigner.
245         *
246         * <p>Note that this method usually invoked by
247         * {@link IDBasedHierarchicClusterBuidler#build(com.chemaxon.clustering.common.IDBasedAssignerBuilder)}.</p>
248         *
249         * @param hierarchy Associated clustering hierarchy
250         * @return          Immutable assigner for the given hierarchy
251         */
252        IDBasedAssigner build(IDBasedHierarchy hierarchy) {
253            return new IDBAssignerImpl(hierarchy, this);
254        }
255    
256        /**
257         * Traverse root aligned clusters to fill an assigner with path length infos.
258         *
259         * @param clusters  Clusters to traverse
260         * @param builder   Builder to fill
261         * @param level     Level of given clusters
262         */
263        private static void traverseRootAlignedClusters(
264                Collection<? extends IDBasedHierarchicCluster> clusters,
265                IDBasedAssignerBuilder builder,
266                int level) {
267    
268            for (IDBasedHierarchicCluster c : clusters) {
269                builder.setClusterLevel(c.getClusterID(), level);
270                for (Integer l : c.leaves()) {
271                    builder.setLeafLevel(l, level + 1.0);
272                }
273                traverseRootAlignedClusters(c.clusters(), builder, level + 1);
274            }
275        }
276    
277        /**
278         * Traverse leaf aligned clusters, set cluster levels accordingly.
279         *
280         * @param clusters  Clusters to traverse
281         * @param builder   Builder to update
282         * @return          Distance of the farthest leaf among the clusters (level of parent-1)
283         */
284        private static int traverseLeafAlignedClusters(
285                Collection<? extends IDBasedHierarchicCluster> clusters,
286                IDBasedAssignerBuilder builder) {
287    
288            if (clusters.isEmpty()) {
289                throw new IllegalArgumentException("No clusters to traverse");
290            }
291    
292            int ret = 0;
293    
294            for (IDBasedHierarchicCluster c : clusters) {
295                int d = 0;
296    
297                if (!c.clusters().isEmpty()) {
298                    d = 1 + traverseLeafAlignedClusters(c.clusters(), builder);
299                }
300                if (!c.leaves().isEmpty() && d == 0) {
301                    d = 1;
302                }
303                builder.setClusterLevel(c.getClusterID(), d);
304                if (d > ret) {
305                    ret = d;
306                }
307            }
308            return ret;
309    
310        }
311    
312        /**
313         * Utility method to build a path length assigner based on a hierarchy.
314         *
315         * @param hierarchy Underlying hierarchy
316         * @return          Built unit path assigner
317         */
318        // TODO: test
319        static IDBasedAssigner createUnitPathAssigner(IDBasedHierarchy hierarchy) {
320            final IDBasedAssignerBuilder b = new IDBasedAssignerBuilder();
321    
322            switch (hierarchy.preferredAlignment()) {
323    
324                case LEAF_ALIGNED:
325                    traverseLeafAlignedClusters(hierarchy.roots(), b);
326                    break;
327    
328                case ROOT_ALIGNED:
329                    traverseRootAlignedClusters(hierarchy.roots(), b, 0);
330                    break;
331    
332                default:
333                    throw new AssertionError();
334    
335            }
336    
337            return b.build(hierarchy);
338        }
339    
340    
341        /**
342         * Implementation for level assigner.
343         */
344        // TODO: it is not nice to use numeric constants for absence indication. Refactor to use BitSet.
345        static class IDBAssignerImpl implements IDBasedAssigner {
346    
347            /**
348             * Cluster levels.
349             *
350             * <p>Levels associated for cluster IDs.</p>
351             */
352            private final double [] cl;
353    
354            /**
355             * Leaf levels.
356             *
357             * <p>Use null for leaf aligned clusterings, for root aligned clusterings this is not null.</p>
358             */
359            private final double [] ll;
360    
361            /**
362             * Max level represented.
363             */
364            private final double maxLevel;
365    
366            /**
367             * The represented clustering hierarchy.
368             */
369            private final IDBasedHierarchy hierarchy;
370    
371            /**
372             * Distinct hierarchy levels associated to clusters, in increasing order.
373             */
374            // TODO: lazy init
375            private final double [] distinctLevels;
376    
377    
378            /**
379             * Partition counts associated to distinct levels.
380             */
381            // TODO: lazy, more efficient init
382            private final int [] partitionClusterCounts;
383    
384            /**
385             * Construct immutable assigner/
386             *
387             * @param hierarchy Associated clustering hierarchy
388             * @param builder   Builder with level values
389             */
390            IDBAssignerImpl(IDBasedHierarchy hierarchy, IDBasedAssignerBuilder builder) {
391                builder.checkConsistency(hierarchy);
392                this.hierarchy = hierarchy;
393                double m = 0;
394                switch (hierarchy.preferredAlignment()) {
395                    case LEAF_ALIGNED:
396                        this.ll = null;
397                        break;
398                    case ROOT_ALIGNED:
399                        this.ll = toSparseArray(builder.leafLevels);
400                        m = max(m, this.ll);
401                        break;
402                    default:
403                        throw new AssertionError();
404                }
405                this.cl = toArray(builder.clusterLevels, hierarchy.getMaxClusterID() + 1);
406                this.maxLevel = max(m, this.cl);
407    
408                // Compose distinct levels array ---------------------------------------------------------------------------
409                // Collect distinct values
410                final Set<Double> l = new HashSet<Double>();
411                // l.add(0.0);
412    
413                // Add all elements from cluster levels
414                for (double d : this.cl) {
415                    l.add(d);
416                }
417    
418                // Allocate array, fill then sort
419                this.distinctLevels = new double[l.size()];
420                final Iterator<Double> it = l.iterator();
421                for (int i = 0; i < l.size(); i++) {
422                    this.distinctLevels[i] = it.next();
423                }
424                if (it.hasNext()) {
425                    throw new IllegalStateException();
426                }
427                Arrays.sort(this.distinctLevels);
428    
429                // Fill partition cluster counts ---------------------------------------------------------------------------
430                this.partitionClusterCounts = new int[this.distinctLevels.length];
431    
432                //Tracerse all clusters
433                final LinkedList<IDBasedHierarchicCluster> queue = new LinkedList<IDBasedHierarchicCluster>();
434                queue.addAll(this.hierarchy.roots());
435                while (!queue.isEmpty()) {
436                    // Cluster traversed
437                    final IDBasedHierarchicCluster c = queue.removeFirst();
438                    // Level associated to this cluster
439                    final double cl = this.cl[c.getClusterID()];
440    
441                    // stopping level associated to the parent
442                    final double pl;
443                    if (c.parent().isPresent()) {
444                        // Stop at parent level
445                        pl = this.cl[c.parent().get().getClusterID()];
446                    } else {
447                        // Set a level which wont result in a stop
448                        if (hierarchy.preferredAlignment() == HierarchicClustering.Alignment.LEAF_ALIGNED) {
449                            pl = this.distinctLevels[this.distinctLevels.length - 1] + 1;
450                        } else {
451                            pl = -1;
452                        }
453                    }
454    
455    
456                    // The contribution of this cluster to the partitioning:
457                    //
458                    // At all levels toward leaves:
459                    //
460                    //     Immediate leaves will be split into singleton clusters. Non-immediate leaf members contribution
461                    //     will be consdered at other clusters
462                    //
463                    //     Contribution: [leaves] clusters, with size 1
464                    //
465                    // At the current level and levels up to (not included) the parent levels (if this is a root then up to
466                    // all levels):
467                    //
468                    //     This cluster, including all the represented members, is collapsed into one cluster. At the parent
469                    //     level (for non roots) the parent cluster will represent this clusters members. Root clusters will
470                    //     be collapsed into one cluster all levels above.
471                    //
472                    //     When the parent cluster has the exactly same level this cluster wont have any contribution.
473                    //
474                    //     Contribution: 1 cluster, with size [members]
475    
476                    final int ci = Arrays.binarySearch(this.distinctLevels, cl);
477    
478                    if (ci < 0) {
479                        throw new IllegalStateException();
480                    }
481    
482                    // Traversal, direction is dependent on alignment
483                    if (hierarchy.preferredAlignment() == HierarchicClustering.Alignment.LEAF_ALIGNED) {
484                        // Leaf aligned
485                        // Go towards leaves: toward decreasing levels
486                        for (int i = ci - 1; i >= 0; i--) {
487                            this.partitionClusterCounts[i] += c.leaves().size();
488                        }
489                        // Go toward parents level (excluding); toward increasing levels
490                        for (int i = ci; i < this.distinctLevels.length && this.distinctLevels[i] < pl; i++) {
491                            this.partitionClusterCounts[i] += 1;
492                        }
493                    } else {
494                        // root aligned
495                        // Go toward leaves, toward increasing levels
496                        for (int i = ci + 1; i < this.distinctLevels.length; i++) {
497                            this.partitionClusterCounts[i] += c.leaves().size();
498                        }
499                        // Go toward parents (excluded), toward decreasing
500                        for (int i = ci; i >= 0 && this.distinctLevels[i] > pl; i--) {
501                            this.partitionClusterCounts[i] += 1;
502                        }
503                    }
504    
505                    // enque children
506                    queue.addAll(c.clusters());
507                }
508    
509    
510            }
511    
512            @Override
513            public double clusterLevel(IDBasedHierarchicCluster cluster) {
514                return this.cl[cluster.getClusterID()];
515            }
516    
517            @Override
518            public double leafLevel(Integer leaf) {
519                if (leaf < 0) {
520                    throw new IllegalArgumentException("Only non negative leaf ids allowed " + leaf);
521                }
522                if (this.ll == null) {
523                    // leaf aligned clustering
524                    return 0;
525                }
526                if (leaf >= this.ll.length || this.ll[leaf] == -1) {
527                    throw new IllegalArgumentException("Invalid leaf id " + leaf);
528                }
529                return this.ll[leaf];
530            }
531    
532            @Override
533            public double maxLevel() {
534                return this.maxLevel;
535            }
536    
537            @Override
538            public SingleLevelClustering<Integer, ? extends Cluster<Integer>> partition(double clippingLevel) {
539                // Partitioning algorithm
540                //  - DFS walk clusters from root
541                //  - (1) If cluster level is on the keep side then assign all members to a new cluster and skip descent
542                //  - (2) If cluster level is on the drop side yhen assign all leaves into singleton clusters and descent
543                //
544                //             (1)    (2)
545                //              :      :
546                //              :  +-----+
547                //              :  +-+ :
548                //             ----+   :
549                //              :  +-- :
550                //              :  +----------
551                //              :      :
552                //
553    
554    
555                final IDBasedClusterBuilder b = new IDBasedClusterBuilder();
556    
557                // List (used as FIFO) of clusters to traverse
558                // invariant: no collapse needed for the parents
559                final LinkedList<IDBasedHierarchicCluster> queue = new LinkedList<IDBasedHierarchicCluster>();
560                queue.addAll(this.hierarchy.roots());
561                while (!queue.isEmpty()) {
562                    final IDBasedHierarchicCluster c = queue.removeFirst();
563                    if ((this.ll == null && clusterLevel(c) <= clippingLevel)
564                            || (this.ll != null && clusterLevel(c) >= clippingLevel))
565                    {
566                        // cluster is collapsed
567                        // new clusters representant will be the same as the collapsed one
568                        final int cid = b.addNewCluster();
569                        b.addStructuresToCluster(c.members(), cid);
570                        b.updateRepresentant(c.representant(), cid);
571                        // no further traversal of this cluster
572                        continue;
573                    }
574                    // cluster is not collapsed, assign leaves as singletons, traverse children
575                    for (Integer i : c.leaves()) {
576                        final int cid = b.addNewCluster();
577                        b.addStructureToCluster(i, cid);
578                        b.updateRepresentant(i, cid);
579                    }
580                    // enque children
581                    queue.addAll(c.clusters());
582                }
583                return b.build();
584            }
585    
586    
587            /**
588             * Utility method to convert a List of Doubles into a primitive array.
589             *
590             * @param list  List to convert
591             * @param size  Expected size
592             * @return      Primitive array
593             * @throws IllegalArgumentException when sizes mismatch or nulls found
594             */
595            private static double [] toArray(List<Double> list, int size) {
596                if (list.size() != size) {
597                    throw new IllegalArgumentException("Mismatch list size " + list.size() + ", expected " + size);
598                }
599                final double [] ret = new double[size];
600                for (int i = 0; i < size; i++) {
601                    final Double d = list.get(i);
602                    if (d == null) {
603                        throw new IllegalArgumentException("Null element found at index " + i);
604                    }
605                    ret[i] = d;
606                }
607                return ret;
608            }
609    
610            /**
611             * Utility method to convert a List of Doubles into a primitive array.
612             *
613             * @param list  List to convert
614             * @return      Primitive array, nulls replaced with -1
615             */
616            private static double [] toSparseArray(List<Double> list) {
617                final double [] ret = new double[list.size()];
618                for (int i = 0; i < list.size(); i++) {
619                    final Double d = list.get(i);
620                    if (d == null) {
621                        ret[i] = -1;
622                    } else {
623                        ret[i] = d;
624                    }
625                }
626                return ret;
627            }
628    
629            /**
630             * Utility method to find maximal value.
631             *
632             * @param d A value
633             * @param a An array of values
634             * @return  Max value found
635             */
636            private static double max(double d, double [] a) {
637                double ret = d;
638                for (double aa : a) {
639                    if (aa > ret) {
640                        ret = aa;
641                    }
642                }
643                return ret;
644            }
645    
646            @Override
647            public double[] getDistinctClusterLevels() {
648                return Arrays.copyOf(this.distinctLevels, this.distinctLevels.length);
649            }
650    
651            @Override
652            public int[] getClusterCountsForLevels() {
653                return Arrays.copyOf(this.partitionClusterCounts, this.partitionClusterCounts.length);
654            }
655    
656        }
657    }