001    /*
002     * Copyright (c) 1998-2014 ChemAxon Ltd. All Rights Reserved.
003     *
004     * This software is the confidential and proprietary information of
005     * ChemAxon. You shall not disclose such Confidential Information
006     * and shall use it only in accordance with the terms of the agreements
007     * you entered into with ChemAxon.
008     *
009     */
010    
011    package com.chemaxon.calculations.util;
012    
013    import com.google.common.annotations.Beta;
014    import com.google.common.base.Optional;
015    import com.google.common.base.Preconditions;
016    import java.util.ArrayList;
017    import java.util.List;
018    
019    /**
020     * Simple histogram of values.
021     *
022     * @author Gabor Imre
023     */
024    @Beta
025    public class Histo {
026    
027    
028    
029        /**
030         * Minimal value observed.
031         */
032        private double min = Double.POSITIVE_INFINITY;
033    
034        /**
035         * Maximal value observed.
036         */
037        private double max = Double.NEGATIVE_INFINITY;
038    
039        /**
040         * Lower limit.
041         *
042         * The lowest value to be stored in the first bin.
043         */
044        private final double l;
045    
046        /**
047         * Upper limit.
048         *
049         * The lowest value to be stored in the bin following the last bin. The current implementation assign these values
050         * to the last bin, so the upper limit is the highest value to be put in the last bin.
051         *
052         */
053        private double h;
054    
055        /**
056         * Automatically scale bins to accomodate largest values.
057         */
058        private final boolean autoscale;
059    
060        /**
061         * Automatically allocate additional bins to accomodate largest values.
062         */
063        private final boolean autostretch;
064    
065        /**
066         * Bins.
067         */
068        private long [] counts;
069    
070        /**
071         * Number of items below the first bin.
072         */
073        private long lows;
074    
075        /**
076         * Number of items above the first bin.
077         */
078        private long highs;
079    
080        /**
081         * True when integer centered bins are used.
082         * @TODO: since wide bins can be used, rename to more appropriate name
083         */
084        private boolean intCenteredBins;
085    
086        /**
087         * For integer centered bins the value of first bin center.
088         * @TODO: firs bins lowest integer
089         */
090        private final int firstBinCenter;
091    
092        /**
093         * For integer centered bins the value of last bin center.
094         * @TODO: last bins highest integer
095         */
096        private int lastBinCenter;
097    
098        /**
099         * For integer values histogram the width of each bin.
100         */
101        private final int intBinWidth;
102    
103        /**
104         * Sum of all values added.
105         */
106        private double valuesSum = 0;
107    
108        /**
109         * Conunt of values added.
110         */
111        private long valuesCount = 0;
112    
113        /**
114         * Create a histogram containing integers.
115         * @TODO:   multiple constructors which signature differs in integer/floating point parameters; use cleaner
116         *          parametrization
117         */
118        public Histo(int minValue, int bins, int binWidth, boolean autoalloc) {
119            if (binWidth < 1) {
120                throw new IllegalArgumentException();
121            }
122            this.l = minValue - 0.5;
123            this.h = minValue + bins * binWidth - 0.5;
124            this.counts = new long[bins];
125            this.lows = 0;
126            this.highs = 0;
127            this.intCenteredBins = true;
128            this.firstBinCenter = minValue;
129            this.intBinWidth = binWidth;
130            this.lastBinCenter = minValue + bins * binWidth - 1;
131            this.autoscale = false;
132            this.autostretch = autoalloc;
133        }
134    
135        public Histo(int minValue, int bins) {
136            this(minValue, bins, 1, false);
137        }
138    
139        /**
140         * Create a histogram containing real numbers.
141         *
142         * @param l     Lowest value to be assigned to the first bin.
143         * @param h     Highest value to be assigned to the last bin.
144         * @param bins  Bin count.
145         */
146        public Histo(double l, double h, int bins, boolean autoscale, boolean autoalloc) {
147            if (autoscale && autoalloc) {
148                throw new IllegalArgumentException();
149            }
150            this.l = l;
151            this.h = h;
152            this.counts = new long[bins];
153            this.lows = 0;
154            this.highs = 0;
155            this.intCenteredBins = false;
156            this.firstBinCenter = -1;
157            this.lastBinCenter = -1;
158            this.intBinWidth = -1;
159            this.autoscale = autoscale;
160            this.autostretch = autoalloc;
161        }
162    
163        public Histo(double l, double h, int bins) {
164            this(l, h, bins, false, false);
165        }
166    
167        /**
168         * Check if the histogram is empty.
169         *
170         * @return  True when no values added yet
171         */
172        public boolean isEmpty() {
173            return this.valuesCount == 0;
174        }
175    
176        /**
177         * Add a value to the histogram.
178         *
179         * @param d Value to be added
180         *
181         * @TODO: segregate real and integer histograms
182         */
183        public void add(double d) {
184            this.valuesSum += d;
185            this.valuesCount += 1;
186    
187            if (this.autoscale && d > this.h) {
188                while (d > this.h) {
189                    this.h = this.l + (this.h - this.l) * 2;
190    
191                    // first bin
192                    this.counts[0] += this.counts[1];
193    
194                    // all other bins
195                    for (int i = 1; i < this.counts.length; i++) {
196                        this.counts[i] = 0;
197                        for (int j = 2 * i; j < this.counts.length && j < 2 * i + 2; j++) {
198                            this.counts[i] += this.counts[j];
199                        }
200                    }
201                }
202            }
203    
204            // Note that >= is used for upper limit comparison here
205            // if new bins can be allocated then the current upper bin should be not closed!
206            if (this.autostretch && d >= this.h) {
207                if (this.intCenteredBins) {
208                    final int newBins = (int) Math.ceil((d - this.h) / this.intBinWidth);
209                    final long [] newCounts = new long[this.counts.length + newBins];
210                    System.arraycopy(this.counts, 0, newCounts, 0, this.counts.length);
211                    this.counts = newCounts;
212                    this.h = this.firstBinCenter + this.counts.length * this.intBinWidth - 0.5;
213                    this.lastBinCenter = this.firstBinCenter + this.counts.length * this.intBinWidth - 1;
214                } else {
215                    final double bw = (this.h - this.l) / this.counts.length;
216                    final int newBins = (int) Math.ceil((d - this.h) / bw);
217                    final long [] newCounts = new long[this.counts.length + newBins];
218                    System.arraycopy(this.counts, 0, newCounts, 0, this.counts.length);
219                    this.counts = newCounts;
220                    this.h = this.l + this.counts.length * bw;
221                }
222            }
223    
224            if (d < this.min) {
225                this.min = d;
226            }
227            if (d > this.max) {
228                this.max = d;
229            }
230    
231            if (d < this.l) {
232                this.lows++;
233            } else if (d > this.h) {
234                this.highs++;
235            } else {
236    
237                final int idx = (int) (this.counts.length * (d - this.l) / (this.h - this.l));
238                if (idx == this.counts.length) {
239                    this.counts[this.counts.length - 1]++;
240                } else {
241                    this.counts[idx]++;
242                }
243            }
244        }
245    
246        /**
247         * Find maximum in an array.
248         *
249         * @param l Array of number
250         * @return  The maximum value found in the array
251         */
252        static long max(long [] l) {
253            Preconditions.checkArgument(l.length > 0, "Empty array");
254            long r = l[0];
255            for (long v : l) {
256                if (r < v) {
257                    r = v;
258                }
259            }
260            return r;
261        }
262    
263        /**
264         * Sum the elements of a given array.
265         *
266         * @param l Array of numbers
267         * @return  Sum of the elements of the given array
268         */
269        static long sum(long [] l) {
270            Preconditions.checkArgument(l.length > 0, "Empty array");
271            long r = 0;
272            for (long v : l) {
273                r += v;
274            }
275            return r;
276        }
277    
278        /**
279         * Check if a bin is associated with the given value.
280         *
281         * @param d Value
282         * @return  true when the value can be assigned to a specific bin
283         * @throws UnsupportedOperationException when the underlying histogram is auto scaled, in this case the result of
284         *              this check would depend on the subsequent value additions.
285         *
286         */
287        public boolean isInCurrentBounds(double d) {
288            if (this.autoscale) {
289                throw new UnsupportedOperationException("Autoscale histogram");
290            }
291            return d >= this.l && d <= this.h;
292        }
293    
294        /**
295         * Get bin count.
296         *
297         * @return  Count of individual bins
298         */
299        public int getBinCount() {
300            return this.counts.length;
301        }
302    
303        /**
304         * Lower bound of the histogram.
305         *
306         * @return  Lower bound of the histogram
307         *
308         * @throws UnsupportedOperationException when the underlying histogram is auto scaled, in this case the result of
309         *              this check would depend on the subsequent value additions.
310         */
311        public double getLowerBound() {
312            if (this.autoscale) {
313                throw new UnsupportedOperationException("Autoscale histogram");
314            }
315            return this.l;
316        }
317    
318        /**
319         * Upper bound of the histogram.
320         *
321         * @return  Upper bound of the histogram
322         *
323         * @throws UnsupportedOperationException when the underlying histogram is auto scaled, in this case the result of
324         *              this check would depend on the subsequent value additions.
325         */
326        public double getUpperBound() {
327            if (this.autoscale) {
328                throw new UnsupportedOperationException("Autoscale histogram");
329            }
330            return this.h;
331        }
332    
333        /**
334         * Lower bound of a bin.
335         *
336         * @param   bin Index of a bin (0-based)
337         *
338         * @return  Lower bound of a bin. This value is always inclusive, an item equals with this value is always
339         *          associated to the given bin
340         * @throws UnsupportedOperationException when the underlying histogram is auto scaled, in this case the result of
341         *              this check would depend on the subsequent value additions.
342         */
343        public double getBinLowerBound(int bin) {
344            if (this.autoscale) {
345                throw new UnsupportedOperationException("Autoscale histogram");
346            }
347            if (bin < 0 || bin >= this.counts.length) {
348                throw new IndexOutOfBoundsException("Invalid bin index " + bin);
349            }
350            return this.l + bin * (this.h - this.l) / this.counts.length;
351        }
352    
353        /**
354         * Upper bound of a bin.
355         *
356         * @param   bin Index of a bin (0-based)
357         *
358         * @return  Upper bound of a bin. This value is usually not inclusive, an item equals with this value is usually
359         *          associated to the next bin. Exclusion is the last bin, an item equals with its upper bound is assigned
360         *          to itself.
361         *
362         * @throws UnsupportedOperationException when the underlying histogram is auto scaled, in this case the result of
363         *              this check would depend on the subsequent value additions.
364         */
365        public double getBinUpperBound(int bin) {
366            if (this.autoscale) {
367                throw new UnsupportedOperationException("Autoscale histogram");
368            }
369            if (bin < 0 || bin >= this.counts.length) {
370                throw new IndexOutOfBoundsException("Invalid bin index " + bin);
371            }
372            return this.l + (bin + 1) * (this.h - this.l) / this.counts.length;
373        }
374    
375        /**
376         * Look up associated bin.
377         *
378         * @param d     A value
379         * @return      Associated bin index if not out of bounds
380         * @throws UnsupportedOperationException when the underlying histogram is auto scaled, in this case the result of
381         *              this check would depend on the subsequent value additions.
382         */
383        public Optional<Integer> getBinIndex(double d) {
384            if (this.autoscale) {
385                throw new UnsupportedOperationException("Autoscale histogram");
386            }
387            if (d < this.l || d > this.h) {
388                return Optional.<Integer>absent();
389            }
390            final int idx = (int) (this.counts.length * (d - this.l) / (this.h - this.l));
391            if (idx == this.counts.length) {
392                // Last bin's upper bound
393                return Optional.of(this.counts.length - 1);
394            } else {
395                // Otherwise correct bin index
396                return Optional.of(idx);
397            }
398        }
399    
400        /**
401         * Get the value from a bin.
402         *
403         * @param bin   Bin index
404         * @return      Value of the bin.
405         */
406        public long getBin(int bin) {
407            return this.counts[bin];
408        }
409    
410    
411        /**
412         * Append a specific number of characters to a StringBuilder.
413         *
414         * @param sb Target
415         * @param c Character to append
416         * @param count Append count.
417         * @return Reference for the passed StringBuilder
418         * @throws IllegalArgumentException for negative counts
419         */
420        private static StringBuilder append(StringBuilder sb, char c, int count) {
421            if (count < 0) {
422                throw new IllegalArgumentException("Negative count " + count);
423            }
424            for (int i = 0; i < count; i++) {
425                sb.append(c);
426            }
427            return sb;
428        }
429    
430        /**
431         * Create a given length of String from a character.
432         *
433         * @param c     Character to concatenate
434         * @param count Required String length
435         *
436         * @return      Concatenated String
437         */
438        private static String mul(char c, int count) {
439            if (count < 0) {
440                throw new IllegalArgumentException("Negative count " + count);
441            }
442            final StringBuilder sb = new StringBuilder(count);
443            for (int i = 0; i < count; i++) {
444                sb.append(c);
445            }
446            return sb.toString();
447        }
448    
449    
450        /**
451         * Compose statistics block.
452         *
453         * <p>Statistics block is a set of lines containing counts, sum, avg, etc</p>
454         *
455         * @return  List of one line Strings
456         */
457        public List<String> getStatBlock() {
458            // Block: counts, averages, statistics, etc --------------------------------------------------------------------
459            // Labels
460            final List<String> l1 = new ArrayList<String>();
461            // Values
462            final List<String> l2 = new ArrayList<String>();
463    
464            l1.add("Total values:");
465            l2.add(Long.toString(this.valuesCount));
466            l1.add("Values range:");
467            l2.add(this.min + " .. " + this.max);
468            l1.add("Sum of values:");
469            l2.add(Double.toString(this.valuesSum));
470            l1.add("Average of values:");
471            l2.add(Double.toString(this.valuesSum / this.valuesCount));
472    
473            if (this.lows > 0 || this.highs > 0) {
474                l1.add("");
475                l2.add("");
476            }
477    
478            if (this.lows > 0) {
479                l1.add("Values below first bin:");
480                l2.add(Long.toString(this.lows));
481            }
482            if (this.highs > 0) {
483                l1.add("Values above last bin:");
484                l2.add(Long.toString(this.highs));
485            }
486    
487            return CharLayout.join(l1, CharLayout.HA.LEFT, " ", l2, CharLayout.HA.LEFT);
488        }
489    
490    
491        /**
492         * Compose histogram body
493         *
494         * <p>Histogram body is the chart area, with axis labels.</p>
495         *
496         * @param rows  Number of rows in the chart area
497         *
498         * @return List of one line Strings (length is rows + 3 lines)
499         */
500        public List<String> getHistoBody(final int rows) {
501    
502            // List of Strings to build
503            final List<String> hi = new ArrayList<String>();
504    
505            // Max bin size
506            final long m = max(this.counts);
507    
508            // Label of maximal values
509            final String maxCountLabel = m + " ";
510    
511            // Left margin for each non-label lines
512            final String leftMargin = mul(' ', maxCountLabel.length());
513    
514    
515    
516            // First histogram row: upper border ---------------------------------------------------------------------------
517            final StringBuilder hborder = new StringBuilder();
518            hborder.append(leftMargin);
519            hborder.append('+');
520            append(hborder, '-', this.counts.length);
521            hborder.append('+');
522    
523            hi.add(hborder.toString());
524    
525            // Histogram body ----------------------------------------------------------------------------------------------
526            for (int y = rows; y > 0; y--) {
527                final StringBuilder histoline = new StringBuilder();
528    
529                // In the first row print max counts, in other rows print the margin
530                if (y == rows) {
531                    histoline.append(maxCountLabel);
532                } else {
533                    histoline.append(leftMargin);
534                }
535    
536    
537                final long th = m * (y - 1) / rows;
538    
539                histoline.append('|');
540                for (int x = 0; x < this.counts.length; x++) {
541                    if (this.counts[x] > th) {
542                        histoline.append('#');
543                    } else {
544                        if (y == 1) {
545                            // last row
546                            histoline.append('_');
547                        } else {
548                            histoline.append(' ');
549                        }
550    
551                    }
552                }
553                histoline.append('|');
554                hi.add(histoline.toString());
555    
556            }
557    
558            // Last histogram row: lower border ----------------------------------------------------------------------------
559            hi.add(hborder.toString());
560    
561    
562            // Histogram label line ----------------------------------------------------------------------------------------
563            // Left label to print
564            final String ll;
565    
566            // Righr label to print
567            final String rl;
568            if (this.intCenteredBins) {
569                if (this.intBinWidth > 1) {
570                    ll = this.firstBinCenter + " - " + (this.firstBinCenter + this.intBinWidth - 1);
571                    rl = (this.lastBinCenter - this.intBinWidth + 1) + " - " + this.lastBinCenter;
572                } else {
573                    // Bin width is 1
574                    ll = Integer.toString(this.firstBinCenter);
575                    rl = Integer.toString(this.lastBinCenter);
576                }
577            } else {
578                ll = Double.toString(this.l);
579                rl = Double.toString(this.h);
580            }
581    
582            final StringBuilder labelline = new StringBuilder();
583            labelline.append(leftMargin);
584            labelline.append(' ');
585            labelline.append(ll);
586            append(labelline, ' ', this.counts.length - ll.length() - rl.length());
587            labelline.append(rl);
588            hi.add(labelline.toString());
589    
590            return hi;
591        }
592    
593        /**
594         * Compose the list of smallest bins block.
595         *
596         * <p>Smallest bins are the bins, which presented with a minimal height bar and contains fewer than 50% of the
597         * values represented by the associated bar.</p>
598         *
599         * @param rows  Number of rows used to render the histogram
600         *
601         * @return  List of Strings
602         */
603        public List<String> getSmallestBinsBlock(int rows) {
604    
605            final long threshold = max(this.counts) / (2 * (long) rows);
606    
607            if (threshold == 0) {
608                // No smallest bins
609                return new ArrayList<String>();
610            }
611    
612            // Labels
613            final List<String> l3 = new ArrayList<String>();
614    
615            // Values
616            final List<String> l4 = new ArrayList<String>();
617    
618            for (int i = 0; i < this.counts.length; i++) {
619                if (this.counts[i] > threshold || this.counts[i] == 0) {
620                    continue;
621                }
622    
623                // TODO: extract bin label to string function
624    
625                if (this.intCenteredBins) {
626                    if (this.intBinWidth > 1) {
627                        // use intervals
628                        final StringBuilder bl = new StringBuilder();
629                        bl.append(this.firstBinCenter + i * this.intBinWidth);
630                        bl.append(" - ");
631                        bl.append(this.firstBinCenter + (i + 1) * this.intBinWidth - 1);
632                        l3.add(bl.toString());
633                    } else {
634                        l3.add(Long.toString(this.firstBinCenter + i));
635                    }
636                } else {
637                    final StringBuilder bl = new StringBuilder();
638                    bl.append(i * (this.h - this.l) / this.counts.length).append(" - ");
639                    bl.append((i + 1) * (this.h - this.l) / this.counts.length);
640                    l3.add(bl.toString());
641                }
642    
643                l4.add(Long.toString(this.counts[i]));
644            }
645    
646            if (l3.isEmpty()) {
647                // no smallest bins
648                return new ArrayList<String>();
649            } else {
650                final List<String> ret = new ArrayList<String>();
651                ret.add("Smallest bins:");
652                ret.add("");
653    
654                final List<String> sb = CharLayout.join(
655                        CharLayout.prefix("    ", l3),
656                        CharLayout.HA.RIGHT,
657                        ": ",
658                        l4,
659                        CharLayout.HA.RIGHT);
660    
661                ret.addAll(sb);
662                return ret;
663            }
664    
665    
666        }
667    
668    
669    
670        /**
671         * Create a String representation.
672         *
673         * @param   rows            Rows to use for the histogram area
674         * @param   listSmallBins   List of smallest bins to be printed
675         *
676         * @return  String representation
677         */
678        public List<String> toStrings(int rows, boolean listSmallBins) {
679            final List<String> histo = getHistoBody(rows);
680            final List<String> rc = getStatBlock();
681    
682            if (listSmallBins) {
683                final List<String> sb = getSmallestBinsBlock(rows);
684                if (!sb.isEmpty()) {
685                    rc.add("");
686                    rc.addAll(sb);
687                }
688            }
689    
690            final List<String> ret = CharLayout.join(histo, CharLayout.HA.LEFT, " ", rc, CharLayout.HA.LEFT);
691            return ret;
692    
693        }
694    
695        public String toString(int rows, boolean listSmallBins) {
696            return CharLayout.toString(toStrings(rows, listSmallBins));
697        }
698    
699        public List<String> toStrings() {
700            return toStrings(30, true);
701        }
702    
703        @Override
704        public String toString() {
705            return toString(30, true);
706        }
707    
708    }
709