001    /*
002     * Copyright (c) 1998-2014 ChemAxon Ltd. All Rights Reserved.
003     *
004     * This software is the confidential and proprietary information of
005     * ChemAxon. You shall not disclose such Confidential Information
006     * and shall use it only in accordance with the terms of the agreements
007     * you entered into with ChemAxon.
008     *
009     */
010    
011    package com.chemaxon.overlap.bruteforce;
012    
013    import chemaxon.license.LicenseGlobals;
014    import chemaxon.license.LicenseHandler;
015    import com.chemaxon.calculations.common.ProgressObservers;
016    import com.chemaxon.calculations.common.SubProgressObserver;
017    import com.chemaxon.descriptors.common.Descriptor;
018    import com.chemaxon.overlap.KnnResults;
019    import com.chemaxon.overlap.KnnResultsImpl;
020    import com.chemaxon.overlap.SimilarityResultImpl;
021    import com.chemaxon.overlap.SimilarityResultNode;
022    import com.chemaxon.overlap.SimilarityResults;
023    import com.chemaxon.overlap.SimilarityResultsImpl;
024    import com.chemaxon.overlap.concurrent.BatchedPagedProcessor;
025    import com.chemaxon.overlap.concurrent.ErrorHandler;
026    import com.chemaxon.overlap.concurrent.ErrorHandlers;
027    import com.chemaxon.overlap.io.Util;
028    import com.chemaxon.overlap.unguarded.UnguardedDissimilarityCalculator;
029    import com.chemaxon.overlap.unguarded.UnguardedVisitor;
030    import com.chemaxon.overlap.unguarded.Unguardeds;
031    import com.google.common.base.Function;
032    import com.google.common.collect.AbstractIterator;
033    import com.google.common.collect.ImmutableList;
034    import com.google.common.collect.UnmodifiableIterator;
035    import com.google.common.util.concurrent.MoreExecutors;
036    import java.io.Serializable;
037    import java.lang.management.ManagementFactory;
038    import java.util.ArrayList;
039    import java.util.Arrays;
040    import java.util.List;
041    import java.util.concurrent.Callable;
042    import java.util.concurrent.CancellationException;
043    import java.util.concurrent.ExecutionException;
044    import java.util.concurrent.ExecutorCompletionService;
045    import java.util.concurrent.ExecutorService;
046    import java.util.concurrent.Future;
047    import javax.management.InstanceAlreadyExistsException;
048    import javax.management.MBeanRegistrationException;
049    import javax.management.MBeanServer;
050    import javax.management.MalformedObjectNameException;
051    import javax.management.NotCompliantMBeanException;
052    import javax.management.ObjectName;
053    
054    /**
055     * Brute force paged similarity search.
056     *
057     * <p>This class is an immutable similarity query engine implementation. The query works on unguarded descriptors
058     * for storage and computational efficiency.</p>
059     *
060     * <p>
061     * Licensing: this class can be used with valid {@link LicenseGlobals#OVERLAP} license.</p>
062     *
063     * @param <T>   Unguarded form of the descriptors
064     *
065     * @author Gabor Imre
066     */
067    public final class UnguardedPagedOverlap<T extends Serializable> implements Serializable {
068    
069        /**
070         * Serial version.
071         */
072        private static final long serialVersionUID = 0;
073    
074        /**
075         * Number of pages to group when processing single query.
076         */
077        public static final int PAGES_GROUP_SIZE_FOR_SINGLE_QUERY = 1000;
078    
079        /**
080         * Page storage.
081         *
082         * <p>All but the last page is expected to have the same size.
083         */
084        private final ImmutableList<ImmutableList<T>> pages;
085    
086        /**
087         * Descriptor comparator to use.
088         */
089        private final UnguardedDissimilarityCalculator<T> comparator;
090    
091        /**
092         * Total number of descriptors stored.
093         */
094        private final int size;
095    
096        /**
097         * Construct new immutable reference from prepared storage.
098         *
099         * @param comparator Comparator to be used
100         * @param pages      List of pages
101         * @param size Total number of descriptors
102         * @throws chemaxon.license.LicenseException when appropriate license is not available
103         */
104        public UnguardedPagedOverlap(
105                UnguardedDissimilarityCalculator<T> comparator,
106                ImmutableList<ImmutableList<T>> pages,
107                int size) {
108            LicenseHandler.getInstance().checkLicense(LicenseGlobals.OVERLAP);
109            this.comparator = comparator;
110            this.pages = pages;
111            this.size = size;
112        }
113    
114        /**
115         * Construct new immutable reference from pages of {@link Descriptor} instances.
116         *
117         * @param <D> Type of descriptors to transform
118         * @param extractor {@link Function} to extract unguarded descriptor content for storage
119         * @param comparator Unguarded comparator to be represented by the constructed instance
120         * @param pages List of descriptor pages. This parameter can not be <code>null</code>.
121         * @param page Additional descriptor page, considered as last page. Ignored if <code>null</code> given.
122         * @throws chemaxon.license.LicenseException when appropriate license is not available
123         */
124        public <D extends Descriptor> UnguardedPagedOverlap(
125                Function<D, T> extractor,
126                UnguardedDissimilarityCalculator<T> comparator,
127                List<ImmutableList<D>> pages,
128                List<D> page) {
129    
130            LicenseHandler.getInstance().checkLicense(LicenseGlobals.OVERLAP);
131    
132            // remember comparator
133            this.comparator = comparator;
134    
135            // Builder for pages storage
136            final ImmutableList.Builder<ImmutableList<T>> pagesBuilder = new ImmutableList.Builder<ImmutableList<T>>();
137    
138            for (ImmutableList<D> p : pages) {
139                pagesBuilder.add(Unguardeds.transformPage(extractor, p));
140            }
141    
142            if (page != null && !page.isEmpty()) {
143                pagesBuilder.add(Unguardeds.transformPage(extractor, page));
144            }
145    
146            this.pages = pagesBuilder.build();
147    
148            // calculate size
149            int s = 0;
150            for (List<?> l : this.pages) {
151                s += l.size();
152            }
153            this.size = s;
154        }
155    
156        /**
157         * Construct another <code>BruteForcePagedSimilarity</code> instance representing a different comparator.
158         *
159         * <p>This operation is memory efficient, since page storage is immutable thus reused.</p>
160         *
161         * @param comparator Comparator to be used
162         * @return           Instance representing comparison with the given comparator
163         */
164        public UnguardedPagedOverlap<T> withComparator(UnguardedDissimilarityCalculator<T> comparator) {
165            return new UnguardedPagedOverlap<T>(comparator, this.pages, this.size);
166        }
167    
168        /**
169         * Total number of descriptors stored.
170         *
171         * @return  Size
172         */
173        public int size() {
174            return this.size;
175        }
176    
177        /**
178         * Traverse storage on single thread.
179         *
180         * <p>
181         * Note that the callback must not modify passed descriptors.</p>
182         *
183         * @param visitor Callback to invoke
184         * @param po ProgressObserver to track progress. Upon completion {@link SubProgressObserver#done()} will be invoked.
185         */
186        public void traverse(UnguardedVisitor<T> visitor, SubProgressObserver po) {
187            po.switchToDeterminate(size());
188            try {
189                int index = 0;
190                for (ImmutableList<T> page : this.pages) {
191                    visitor.visitDescriptors(index, page);
192                    index += page.size();
193                    po.worked(page.size());
194                }
195            } finally {
196                po.done();
197            }
198        }
199    
200        /**
201         * Iterate descriptors.
202         *
203         * @return Iterator for stored descriptors
204         */
205        UnmodifiableIterator<T> iterateDescriptors() {
206            final UnmodifiableIterator<ImmutableList<T>> pagesIterator = this.pages.iterator();
207    
208            // ensure that pages are not empty
209            if (!pagesIterator.hasNext()) {
210                throw new IllegalStateException();
211            }
212    
213            return new AbstractIterator<T>() {
214    
215                /**
216                 * Current, possibly partially processed page
217                 */
218                private UnmodifiableIterator<T> currentPageIterator = pagesIterator.next().iterator();
219    
220                @Override
221                protected T computeNext() {
222    
223                    if (currentPageIterator.hasNext()) {
224                        return currentPageIterator.next();
225                    } else {
226                        // current page has no next
227                        if (!pagesIterator.hasNext()) {
228                            return endOfData();
229                        } else {
230                            currentPageIterator = pagesIterator.next().iterator();
231                            if (currentPageIterator.hasNext()) {
232                                return currentPageIterator.next();
233                            } else {
234                                throw new IllegalStateException("Empty page");
235                            }
236                        }
237                    }
238                }
239            };
240    
241        }
242    
243        /**
244         * Iterate descriptors.
245         *
246         * @param pagesize Number of descriptors to page together
247         * @return Iterator
248         */
249        UnmodifiableIterator<ImmutableList<T>> iterateDescriptors(final int pagesize) {
250            final UnmodifiableIterator<ImmutableList<T>> pagesIterator = this.pages.iterator();
251    
252            // ensure that pages are not empty
253            if (!pagesIterator.hasNext()) {
254                throw new IllegalStateException();
255            }
256    
257            return new AbstractIterator<ImmutableList<T>>() {
258    
259                /**
260                 * Current, possibly partially processed page
261                 */
262                private UnmodifiableIterator<T> currentPageIterator = pagesIterator.next().iterator();
263    
264                @Override
265                protected ImmutableList<T> computeNext() {
266    
267                    if (!currentPageIterator.hasNext() && !pagesIterator.hasNext()) {
268                        return endOfData();
269                    }
270    
271                    final ImmutableList.Builder<T> ret = new ImmutableList.Builder<T>();
272                    for (int i = 0; i < pagesize; i++) {
273                        if (!currentPageIterator.hasNext() && pagesIterator.hasNext()) {
274                            currentPageIterator = pagesIterator.next().iterator();
275                        }
276                        if (currentPageIterator.hasNext()) {
277                            ret.add(currentPageIterator.next());
278                        } else {
279                            break;
280                        }
281                    }
282    
283                    return ret.build();
284                }
285            };
286        }
287    
288        /**
289         * Retrieve descriptor as String.
290         *
291         * <p>
292         * This operation is recommended for debug only. Execution might be slow.</p>
293         *
294         * @param index Index of descriptor
295         * @param recognizeArrays Recognize arrays and traverse its elements
296         * @return Descriptor as String
297         */
298        public String getDescriptorAsString(int index, boolean recognizeArrays) {
299            int b = 0;
300            for (ImmutableList<T> page : this.pages) {
301                if (b + page.size() > index) {
302                    final Object d = page.get(index - b);
303                    if (recognizeArrays) {
304                        return Util.arrayAwareToString(d);
305                    } else {
306                        return d.toString();
307                    }
308                }
309                b += page.size();
310            }
311            throw new IllegalArgumentException("Index not found " + index);
312        }
313    
314        /**
315         * Find most similar structures for a single query.
316         *
317         * <p>
318         * This method utilizes concurrent execution and blocks until completion. ProgressObserver callback is invoked from
319         * the calling thread only.</p> \
320         *
321         * @param query Query descriptor
322         * @param count Number of expected most similar structures
323         * @param po ProgressObserver to track progress. Upon completion {@link SubProgressObserver#done()} will be invoked.
324         * @param ex ExecutorService to run workers
325         * @return List of most similar structures, ordered by increasing dissimilarity order
326         * @throws CancellationException when cancelled through supplied progress observer
327         */
328        public ImmutableList<SimilarityResultNode> findMostSimilars(
329                final T query, final int count, final SubProgressObserver po, final ExecutorService ex) {
330    
331            po.switchToDeterminate(this.size);
332            try {
333                // Pages to search
334                final UnmodifiableIterator<ImmutableList<T>> pageIterator = this.pages.iterator();
335    
336                // Final result
337                final Dissimilarities finalResult = new Dissimilarities(count);
338    
339                // Queue
340                final ExecutorCompletionService<SearchMostSimilars<T>> cs
341                        = new ExecutorCompletionService<SearchMostSimilars<T>>(ex);
342    
343                // Number of tasks in the executor service
344                int pendingCt = 0;
345    
346                // First id to submit
347                int firstId = 0;
348    
349                // Loop through pages
350                while (pendingCt > 0 || pageIterator.hasNext()) {
351    
352                    if (pageIterator.hasNext()) {
353                        // remember first descriptor id to submit
354                        final int firstIdToSubmit = firstId;
355    
356                        // Group of pages to submit as new work unit
357                        final List<ImmutableList<T>> pgroup
358                                = new ArrayList<ImmutableList<T>>(PAGES_GROUP_SIZE_FOR_SINGLE_QUERY);
359    
360                        // fill group
361                        for (int i = 0; i < PAGES_GROUP_SIZE_FOR_SINGLE_QUERY && pageIterator.hasNext(); i++) {
362                            final ImmutableList<T> page = pageIterator.next();
363                            pgroup.add(page);
364                            firstId += page.size();
365                        }
366    
367                        cs.submit(new SearchMostSimilars<T>(this.comparator, query, pgroup, firstIdToSubmit, count));
368    
369                        pendingCt++;
370    
371                    }
372                    Future<SearchMostSimilars<T>> r;
373                    while ((r = cs.poll()) != null) {
374                        finalResult.mergeSorted(r.get());
375                        pendingCt--;
376                        po.worked(r.get().getTotalTargetCount());
377                    }
378                }
379    
380                return finalResult.toList();
381    
382            } catch (InterruptedException e) {
383                throw new IllegalStateException(e);
384            } catch (ExecutionException e) {
385                throw new IllegalStateException(e);
386            } finally {
387                po.done();
388            }
389    
390        }
391    
392        /**
393         * Calculate full dissimilarity matrix.
394         *
395         * <p>
396         * Calculate full dissimilarity matrix between the represented targets and the given queries. Note that the
397         * resulting matrix might have be excessively large size.</p>
398         *
399         * <p>
400         * The resulting structure is allocated upon startup.</p>
401         *
402         * @param queries Query descriptors
403         * @param po ProgressObserver to track progress. Upon completion {@link SubProgressObserver#done()} will be invoked.
404         * @param ex ExecutorService to run workers
405         * @param managed if true, use JMX for monitoring
406         * @return List of dissimilarity values for each query. Element <code>i</code> will contain the dissimilarity vector
407         * for query <code>i</code>. The dissimilarity vector contains the dissimilarities of targets.     *
408         * @throws CancellationException when cancelled through supplied progress observer
409         * @throws IllegalStateException possible exceptions due to JMX connection are wrapped to
410         *  {@link IllegalStateException}
411         */
412        public ImmutableList<double[]> calculateFullMatrix(
413                final List<T> queries, final SubProgressObserver po, final ExecutorService ex, boolean managed) {
414    
415            po.switchToDeterminate(this.size * queries.size());
416    
417            // Allocate results
418            final double[][] res = new double[queries.size()][this.size];
419    
420            final ErrorHandler<ImmutableList<T>> eh = ErrorHandlers.<ImmutableList<T>>errorPropagatingHandler();
421            final FullMatrix.Calc<T> calc = new FullMatrix.Calc<T>(queries, this.comparator);
422            final FullMatrix.Report<T> rep = new FullMatrix.Report<T>(res);
423    
424            if (managed) {
425                try {
426                final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
427                final ObjectName on = new ObjectName("com.chemaxon:type=FullMatrix");
428                BatchedPagedProcessor.<ImmutableList<T>, double[][]>processManaged(
429                        mbs, on,
430                        1, // pagesize
431                        1000, // maxqueue
432                        po,
433                        MoreExecutors.listeningDecorator(ex),
434                        this.pages.iterator(),
435                        calc,
436                        eh,
437                        rep);
438                } catch (InstanceAlreadyExistsException iae) {
439                    throw new IllegalStateException(iae);
440                } catch (MBeanRegistrationException mb) {
441                    throw new IllegalStateException(mb);
442                } catch (NotCompliantMBeanException nc) {
443                    throw new IllegalStateException(nc);
444                } catch (MalformedObjectNameException ma) {
445                    throw new IllegalStateException(ma);
446                }
447            } else {
448                BatchedPagedProcessor.<ImmutableList<T>, double[][]>process(
449                        1, // pagesize
450                        1000, // maxqueue
451                        po,
452                        ex,
453                        this.pages.iterator(),
454                        calc,
455                        eh,
456                        rep);
457            }
458            return ImmutableList.copyOf(res);
459    
460        }
461    
462        /**
463         * Brute force find kNN among the contents of the represented descriptors.
464         *
465         * @param k Number of nearest neighbors to find
466         * @param queriesGroup Number of queries to group
467         * @param po ProgressObserver to track progress.
468         * @param ex ExecutorService to run workers. Upon completion {@link SubProgressObserver#done()} will be invoked.
469         * @return Storage of knn found. Note that self ids are not part of the knn lists,
470         * @throws java.util.concurrent.ExecutionException Re-theown
471         */
472        public KnnResults calculateSelfKnn(
473                final int k,
474                final int queriesGroup,
475                final SubProgressObserver po,
476                final ExecutorService ex) throws ExecutionException {
477    
478            // we will invoke approximately |Q|^2 descriptor comparisons
479            po.switchToDeterminate((long) size() * size());
480            try {
481                // allocate results
482                final int[] q = new int[size()];
483                final int[] t = new int[size() * k];
484                final double[] d = new double[t.length];
485    
486                // Iterate through queries
487                //final UnmodifiableIterator<ImmutableList<T>> queryIterator = iterateDescriptors(queriesGroup);
488                final UnmodifiableIterator<T> queryIterator = iterateDescriptors();
489    
490                // We wont follow individual searches
491                final SubProgressObserver npo = ProgressObservers.createForgivingNullObserver();
492    
493                // Next query index
494                int nextQ = 0;
495                while (queryIterator.hasNext()) {
496                    final T cq = queryIterator.next();
497    
498                    // Find k+1
499                    final List<SimilarityResultNode> res = findMostSimilars(cq, k + 1, npo, ex);
500    
501                    if (res.size() != k + 1) {
502                        throw new IllegalStateException();
503                    }
504    
505                    q[nextQ] = nextQ;
506    
507                    int p = 0;
508                    for (int i = 0; i < k; i++) {
509                        if (res.get(p).getTargetID() == nextQ) {
510                            p++;
511                            continue;
512                        }
513                        t[nextQ * k + i] = res.get(p).getTargetID();
514                        d[nextQ * k + i] = res.get(p).getDissimiliarity();
515                        p++;
516                    }
517    
518                    nextQ += 1;
519                    po.worked((long) this.size() * 1);
520                }
521    
522                return new KnnResultsImpl(k, q, t, d);
523            } finally {
524                po.done();
525            }
526        }
527    
528        /**
529         * Brute force find most similar structure for a set of structures.
530         *
531         * <p>
532         * For each query index the most similar target index is recorded.</p>
533         *
534         * @param queries Query descriptors
535         * @param queriesGroup Number of queries to group
536         * @param po ProgressObserver to track progress. Upon completion {@link SubProgressObserver#done()} will be invoked.
537         * @param ex ExecutorService to run workers
538         * @return Storage of most similars found
539         */
540        public SimilarityResults findMostSimilar(
541                final UnguardedPagedOverlap<T> queries,
542                final int queriesGroup,
543                final SubProgressObserver po,
544                final ExecutorService ex
545        ) {
546            // we will invoke |Q||T| comparisons
547            po.switchToDeterminate((long) this.size() * queries.size());
548            try {
549                // Allocate results
550                final int[] q = new int[queries.size()];
551                final int[] t = new int[queries.size()];
552                final double[] d = new double[queries.size()];
553    
554                // Iterate through queries
555                final UnmodifiableIterator<ImmutableList<T>> queryIterator = queries.iterateDescriptors(queriesGroup);
556    
557                // We wont follow individual searches
558                final SubProgressObserver npo = ProgressObservers.createForgivingNullObserver();
559    
560                // Next query index
561                int nextQ = 0;
562                while (queryIterator.hasNext()) {
563                    final ImmutableList<T> cq = queryIterator.next();
564    
565                    // Launch search
566                    final ImmutableList<SimilarityResultNode> res = findMostSimilar(cq, npo, ex);
567    
568                    // Store results
569                    for (int i = 0; i < cq.size(); i++) {
570                        q[nextQ + i] = nextQ + i;
571                        t[nextQ + i] = res.get(i).getTargetID();
572                        d[nextQ + i] = res.get(i).getDissimiliarity();
573                    }
574    
575                    nextQ += cq.size();
576                    po.worked((long) this.size() * cq.size());
577                }
578    
579    
580                return new SimilarityResultsImpl(q, t, d);
581            } finally {
582                po.done();
583            }
584        }
585    
586        /**
587         * Brute force find most similar structure for a set of structures.
588         *
589         * <p>This method utilizes concurrent execution and blocks until completion. ProgressObserver callback is invoked
590         * from the calling thread only.</p>
591         *
592         * <p>Note that when the best dissimiliraty score is associated for multiple queries one of them is picked. The
593         * selection in this case is non deterministic.</p>
594         *
595         * @param queries Query descriptors (in the unguarded form)
596         * @param po ProgressObserver to track progress. Upon completion {@link SubProgressObserver#done()} will be invoked.
597         * @param ex ExecutorService to run workers
598         * @return List of most similar nodes associated to the query structures
599         * @throws CancellationException when cancelled through supplied progress observer
600         */
601        public ImmutableList<SimilarityResultNode> findMostSimilar(
602                final List<T> queries, final SubProgressObserver po, final ExecutorService ex) {
603    
604    
605            po.switchToDeterminate((long) this.size * queries.size());
606            try {
607                final UnmodifiableIterator<ImmutableList<T>> pageIterator = this.pages.iterator();
608    
609                final Dissimilarities finalResult = new Dissimilarities(queries.size());
610    
611                final ExecutorCompletionService<SearchPage<T>> cs = new ExecutorCompletionService<SearchPage<T>>(ex);
612    
613                int pendingCt = 0;
614                int firstId = 0;
615                while (pendingCt > 0 || pageIterator.hasNext()) {
616                    if (pageIterator.hasNext()) {
617                        final List<T> page = pageIterator.next();
618                        cs.submit(new SearchPage<T>(this.comparator, queries, page, firstId));
619                        firstId += page.size();
620                        pendingCt++;
621    
622                    }
623                    Future<SearchPage<T>> r;
624                    while ((r = cs.poll()) != null) {
625                        finalResult.mergeDistinct(r.get());
626                        pendingCt--;
627                        po.worked(r.get().getPageSize() * r.get().getQueriesCount());
628                    }
629                }
630    
631                return finalResult.toList();
632            } catch (InterruptedException e) {
633                throw new IllegalStateException(e);
634            } catch (ExecutionException e) {
635                throw new IllegalStateException(e);
636            } finally {
637                po.done();
638            }
639    
640        }
641    
642        /**
643         * Search multiple pages for most n-most similar structures.
644         *
645         * @param <T> Type of unguarded descriptor
646         */
647        static class SearchMostSimilars<T> extends Dissimilarities implements Callable<SearchMostSimilars<T>> {
648            /**
649             * Underlying comparator.
650             */
651            final UnguardedDissimilarityCalculator<T> comparator;
652    
653            /**
654             * Targets.
655             */
656            final List<ImmutableList<T>> pages;
657    
658            /**
659             * Query.
660             */
661            final T query;
662    
663            /**
664             * ID of the first target in {@link #pages}.
665             */
666            final int firstId;
667    
668            /**
669             * Number of total pages size.
670             */
671            int totalPagesSize = -1;
672    
673            /**
674             * Construct task
675             *
676             * @param comparator Underlying comparator
677             * @param query Query descriptor to search
678             * @param pages Consecutive pages to search
679             * @param firstId ID of the first descriptor of the first page
680             * @param size Number of most similars to store
681             */
682            SearchMostSimilars(
683                    UnguardedDissimilarityCalculator<T> comparator,
684                    T query, List<ImmutableList<T>> pages, int firstId, int size) {
685                super(size);
686                this.pages = pages;
687                this.query = query;
688                this.comparator = comparator;
689                this.firstId = firstId;
690            }
691    
692            @Override
693            public SearchMostSimilars<T> call() throws Exception {
694                this.totalPagesSize = 0;
695                int id = this.firstId;
696                for (ImmutableList<T> page : this.pages) {
697                    for (T target : page) {
698                        final double d = this.comparator.dissimilarity(target, this.query);
699                        insertSorted(d, id);
700                        id++;
701                        this.totalPagesSize++;
702                    }
703                }
704    
705                return this;
706            }
707    
708            /**
709             * Get number of total descriptors compared.
710             *
711             * @return Total targets size
712             * @throws IllegalStateException when no {@link #call()} invoked before
713             */
714            public int getTotalTargetCount() {
715                if (this.totalPagesSize < 0) {
716                    throw new IllegalStateException();
717                }
718    
719                return this.totalPagesSize;
720            }
721    
722    
723        }
724    
725        /**
726         * Search one page of descriptors against a set of queries.
727         *
728         * @param <T> Type of unguarded descriptor
729         */
730        static class SearchPage<T> extends Dissimilarities implements Callable<SearchPage<T>> {
731    
732            /**
733             * Underlying comparator.
734             */
735            final UnguardedDissimilarityCalculator<T> comparator;
736    
737            /**
738             * Targets.
739             */
740            final List<T> page;
741    
742            /**
743             * Queries.
744             */
745            final List<T> queries;
746    
747            /**
748             * ID of the first target in {@link #page}.
749             */
750            final int firstId;
751    
752            /**
753             * Construct.
754             *
755             * @param comparator Underlying comparator
756             * @param queries Queries to search
757             * @param page Page of targets
758             * @param firstId ID of the first target
759             */
760            SearchPage(UnguardedDissimilarityCalculator<T> comparator, List<T> queries, List<T> page, int firstId) {
761                super(queries.size());
762                this.page = page;
763                this.queries = queries;
764                this.comparator = comparator;
765                this.firstId = firstId;
766            }
767    
768            /**
769             * Number of represented targets.
770             *
771             * @return target count
772             */
773            public int getPageSize() {
774                return this.page.size();
775            }
776    
777            /**
778             * Number of queries.
779             *
780             * @return query count
781             */
782            public int getQueriesCount() {
783                return this.queries.size();
784            }
785    
786            @Override
787            public SearchPage<T> call() throws Exception {
788                int id = this.firstId;
789                for (T target : this.page) {
790                    for (int i = 0; i < this.idArray.length; i++) {
791                        final double d = this.comparator.dissimilarity(target, this.queries.get(i));
792    
793                        if (this.dissimArray[i] > d) {
794                            this.dissimArray[i] = d;
795                            this.idArray[i] = id;
796                        }
797    
798                    }
799                    id++;
800                }
801                return this;
802            }
803        }
804    
805    
806        /**
807         * Single threaded reference for multi query most similar lookup.
808         *
809         * @param queries   Queries to search
810         * @param po        Observer; will be closed upon finish/abort/error. One work unit is assigned to one comparison.
811         * @return          Results
812         * @throws CancellationException    When cancelled through supplied progress observer
813         */
814        public ImmutableList<SimilarityResultNode> findMostSimilarOnSingleThread(
815                final List<T> queries, final SubProgressObserver po) throws CancellationException {
816    
817            po.switchToDeterminate((long) this.size * queries.size());
818            try {
819    
820                int id = 0;
821    
822                final int [] bestids = new int[queries.size()];
823                final double [] bestdissims = new double[queries.size()];
824                Arrays.fill(bestdissims, Double.MAX_VALUE);
825    
826                for (List<T> page : this.pages) {
827                    for (T target : page) {
828                        for (int i = 0; i < bestids.length; i++) {
829    
830                            final double d = this.comparator.dissimilarity(target, queries.get(i));
831    
832                            if (bestdissims[i] > d) {
833                                bestdissims[i] = d;
834                                bestids[i] = id;
835                            }
836    
837                            /*
838                            if (ret[i] == null || ret[i].getDissimiliarity() > d) {
839                                ret[i] = new SimilarityResultImpl(id, d);
840                            }
841                            */
842                        }
843                        id++;
844                        po.worked(bestids.length);
845                    }
846                }
847    
848                final ImmutableList.Builder<SimilarityResultNode> ret = new ImmutableList.Builder<SimilarityResultNode>();
849                for (int i = 0; i < bestids.length; i++) {
850                    ret.add(new SimilarityResultImpl(bestids[i], bestdissims[i]));
851                }
852                return ret.build();
853    
854                //final SimilarityResultNode[] ret = new SimilarityResultNode[queries.size()];
855                //return ImmutableList.copyOf(ret);
856            } finally {
857                po.done();
858            }
859        }
860    
861    
862        /**
863         * Find most similar for a given query structure.
864         *
865         * <p><b>This method is intended only for test/diagnostincs.</b> Users of this API usually need to invoke
866         * {@link #findMostSimilar(java.util.List, com.chemaxon.calculations.common.SubProgressObserver, java.util.concurrent.ExecutorService)}
867         * instead.</p>
868         *
869         * <p>This method blocks until ready and uses a single (the calling) thread to do the calculation.</p>
870         *
871         * @param query     Query descriptor
872         * @param po        Progress observer to track progress. Completion is reported by invoking
873         *                  {@link SubProgressObserver#done()} upon completion, cancellation or error
874         * @return          The most similar structure found
875         * @throws CancellationException upon cancellation through the given progress observer
876         */
877        public SimilarityResultNode findMostSimilarOnSingleThread(
878                final T query, final SubProgressObserver po) throws CancellationException {
879    
880            po.switchToDeterminate(this.size);
881            try {
882                // ID of next descriptor to compare
883                int id = 0;
884    
885                int bestid = -1;
886                double bestdissim = Double.MAX_VALUE;
887    
888                for (List<T> page : this.pages) {
889                    for (T target : page) {
890                        final double d = this.comparator.dissimilarity(target, query);
891    
892                        if (d < bestdissim) {
893                            bestdissim = d;
894                            bestid = id;
895                        }
896    
897                        id++;
898                    }
899                    po.worked(page.size());
900                }
901                return new SimilarityResultImpl(bestid, bestdissim);
902            } finally {
903                po.done();
904            }
905        }
906    
907    
908        /**
909         * Find most similars for a given query structure.
910         *
911         * <p><b>This method is intended only for test/diagnostincs.</b> Users of this API usually need to invoke
912         * {@link #findMostSimilar(java.util.List, com.chemaxon.calculations.common.SubProgressObserver, java.util.concurrent.ExecutorService)}
913         * instead.</p>
914         *
915         * <p>This method blocks until ready and uses a single (the calling) thread to do the calculation.</p>
916         *
917         * @param query     Query descriptor
918         * @param po        Progress observer to track progress. Completion is reported by invoking
919         *                  {@link SubProgressObserver#done()} upon completion, cancellation or error
920         * @param maxCount  Max results count
921         * @return          List of at most the given number of most similars, in increasing dissimilarity order
922         * @throws CancellationException upon cancellation through the given progress observer
923         */
924        public ImmutableList<SimilarityResultNode> findMostSimilarsOnSingleThread(
925                final T query, final SubProgressObserver po, final int maxCount) throws CancellationException {
926    
927            po.switchToDeterminate(this.size);
928    
929            try {
930                final List<SimilarityResultNode> results = new ArrayList<SimilarityResultNode>(maxCount + 1);
931    
932                int id = 0;
933    
934                for (List<T> page : this.pages) {
935                    for (T target : page) {
936                        final double d = this.comparator.dissimilarity(target, query);
937    
938                        // Avoid accessing possible expensive list operation
939                        if (id == 0) {
940                            // Empty list, insert first
941                            results.add(new SimilarityResultImpl(id, d));
942                        } else if (id < maxCount || results.get(results.size() - 1).getDissimiliarity() > d) {
943                            // New item has place in results
944    
945                            // find the place for the next element
946                            for (int i = 0; i < results.size(); i++) {
947                                if (results.get(i).getDissimiliarity() > d) {
948                                    // d should be placed at position i
949                                    results.add(i, new SimilarityResultImpl(id, d));
950                                    break;
951                                }
952                            }
953                            if (results.size() > maxCount) {
954                                results.remove(results.size() - 1);
955                            }
956                        }
957                    }
958    
959                    id++;
960                    po.worked(page.size());
961                }
962                return ImmutableList.copyOf(results);
963            } finally {
964                po.done();
965            }
966        }
967    }