001    /*
002     * Copyright (c) 1998-2014 ChemAxon Ltd. All Rights Reserved.
003     *
004     * This software is the confidential and proprietary information of
005     * ChemAxon. You shall not disclose such Confidential Information
006     * and shall use it only in accordance with the terms of the agreements
007     * you entered into with ChemAxon.
008     *
009     */
010    
011    package com.chemaxon.overlap.bruteforce;
012    
013    import chemaxon.license.LicenseGlobals;
014    import chemaxon.license.LicenseHandler;
015    import com.chemaxon.calculations.common.SubProgressObserver;
016    import com.chemaxon.descriptors.common.Descriptor;
017    import com.chemaxon.descriptors.common.DescriptorComparator;
018    import com.chemaxon.overlap.SimilarityResultImpl;
019    import com.chemaxon.overlap.SimilarityResultNode;
020    import com.google.common.collect.ImmutableList;
021    import com.google.common.collect.UnmodifiableIterator;
022    import java.util.ArrayList;
023    import java.util.Arrays;
024    import java.util.List;
025    import java.util.concurrent.Callable;
026    import java.util.concurrent.CancellationException;
027    import java.util.concurrent.ExecutionException;
028    import java.util.concurrent.ExecutorCompletionService;
029    import java.util.concurrent.ExecutorService;
030    import java.util.concurrent.Future;
031    
032    /**
033     * Brute force paged similarity search.
034     *
035     * <p>This class is an immutable similarity query engine implementation.</p>
036     *
037     * <p>
038     * Licensing: this class can be used with valid {@link LicenseGlobals#OVERLAP} license.</p>
039     *
040     * @param <D> Represented descriptor type
041     *
042     * @author Gabor Imre
043     */
044    public class BruteForcePagedSimilarity<D extends Descriptor> {
045    
046        /**
047         * Page storage.
048         *
049         * <p>All but the last page is expected to have the same size.
050         */
051        private final ImmutableList<ImmutableList<D>> pages;
052    
053        /**
054         * Descriptor comparator to use.
055         */
056        private final DescriptorComparator<D> comparator;
057    
058        /**
059         * Total number of descriptors stored.
060         */
061        private final int size;
062    
063        /**
064         * Construct new immutable reference.
065         *
066         * @param comparator Comparator to be used
067         * @param pages List of pages
068         * @param size Total number of descriptors
069         * @throws chemaxon.license.LicenseException when appropriate license is not available
070         */
071        private BruteForcePagedSimilarity(
072                DescriptorComparator<D> comparator, ImmutableList<ImmutableList<D>> pages, int size) {
073            LicenseHandler.getInstance().checkLicense(LicenseGlobals.OVERLAP);
074            this.comparator = comparator;
075            this.pages = pages;
076            this.size = size;
077        }
078    
079        /**
080         * Construct new immutable reference.
081         *
082         * @param comparator Comparator to be used
083         * @param pages List of descriptor pages. This parameter can not be <code>null</code>.
084         * @param page Additional descriptor page, considered as last page. Ignored if <code>null</code> given.
085         * @throws chemaxon.license.LicenseException when appropriate license is not available
086         */
087        public BruteForcePagedSimilarity(DescriptorComparator<D> comparator, List<ImmutableList<D>> pages, List<D> page) {
088            LicenseHandler.getInstance().checkLicense(LicenseGlobals.OVERLAP);
089            // remember comparator
090            this.comparator = comparator;
091    
092            // Create storage
093            if (page == null) {
094                this.pages = new ImmutableList.Builder<ImmutableList<D>>().addAll(pages).build();
095            } else {
096                this.pages = new ImmutableList.Builder<ImmutableList<D>>().
097                        addAll(pages).
098                        add(ImmutableList.copyOf(page)).build();
099            }
100    
101            int s = 0;
102            for (List<D> l : this.pages) {
103                s += l.size();
104            }
105            this.size = s;
106        }
107    
108        /**
109         * Construct another <code>BruteForcePagedSimilarity</code> instance representing a different comparator.
110         *
111         * <p>This operation is memory efficient, since page storage is immutable thus reusable.</p>
112         *
113         * @param comparator Comparator to be used
114         * @return           Instance representing comparison with the given comparator
115         */
116        public BruteForcePagedSimilarity<D> withComparator(DescriptorComparator<D> comparator) {
117            return new BruteForcePagedSimilarity(comparator, this.pages, this.size);
118        }
119    
120        /**
121         * Total number of descriptors stored.
122         *
123         * @return  Size
124         */
125        public int size() {
126            return this.size;
127        }
128    
129        /**
130         * Brute force find most similar structure for a set of structures.
131         *
132         * <p>This method utilizes concurrent execution and blocks until completion. ProgressObserver callback is invoked
133         * from the calling thread only.</p>
134         *
135         * @param queries Query descriptors
136         * @param po ProgressObserver to track progress
137         * @param ex ExecutorService to run workers
138         * @return List of most similar structures
139         */
140        public ImmutableList<SimilarityResultNode> findMostSimilar(
141                final List<D> queries, final SubProgressObserver po, final ExecutorService ex) {
142    
143    
144            po.switchToDeterminate((long) this.size * queries.size());
145            try {
146                final UnmodifiableIterator<ImmutableList<D>> pageIterator = this.pages.iterator();
147    
148                final DissimResult finalResult = new DissimResult(queries.size());
149    
150                final ExecutorCompletionService<SearchPage<D>> cs = new ExecutorCompletionService<SearchPage<D>>(ex);
151    
152                int pendingCt = 0;
153                int firstId = 0;
154                while (pendingCt > 0 || pageIterator.hasNext()) {
155                    if (pageIterator.hasNext()) {
156                        final List<D> page = pageIterator.next();
157                        cs.submit(new SearchPage(this.comparator, queries, page, firstId));
158                        firstId += page.size();
159                        pendingCt++;
160    
161                    }
162                    Future<SearchPage<D>> r;
163                    while ((r = cs.poll()) != null) {
164                        finalResult.merge(r.get());
165                        pendingCt--;
166                        po.worked(r.get().getPageSize() * r.get().getQueriesCount());
167                    }
168                }
169    
170                return finalResult.toList();
171            } catch (InterruptedException e) {
172                throw new IllegalStateException(e);
173            } catch (ExecutionException e) {
174                throw new IllegalStateException(e);
175            } finally {
176                po.done();
177            }
178    
179        }
180    
181    
182        static class DissimResult {
183    
184            protected final int [] bestids;
185    
186            protected final double [] bestdissims;
187    
188            public DissimResult(int size) {
189                this.bestids = new int[size];
190                this.bestdissims = new double[size];
191                Arrays.fill(bestids, -1);
192                Arrays.fill(bestdissims, Double.MAX_VALUE);
193            }
194    
195            public int[] getBestids() {
196                return this.bestids;
197            }
198    
199            public double[] getBestdissims() {
200                return this.bestdissims;
201            }
202    
203            public void merge(DissimResult another) {
204                for (int i = 0; i < this.bestids.length; i++) {
205                    if (another.bestdissims[i] < this.bestdissims[i]) {
206                        this.bestdissims[i] = another.bestdissims[i];
207                        this.bestids[i] = another.bestids[i];
208                    }
209                }
210            }
211    
212            public ImmutableList<SimilarityResultNode> toList() {
213                final ImmutableList.Builder<SimilarityResultNode> ret = new ImmutableList.Builder<SimilarityResultNode>();
214                for (int i = 0; i < this.bestids.length; i++) {
215                    ret.add(new SimilarityResultImpl(this.bestids[i], this.bestdissims[i]));
216                }
217                return ret.build();
218    
219            }
220    
221        }
222    
223        static class SearchPage<D extends Descriptor> extends DissimResult implements Callable<SearchPage<D>> {
224    
225            private final DescriptorComparator<D> comparator;
226    
227            private final List<D> page;
228    
229            private final List<D> queries;
230    
231    
232            final int firstId;
233    
234    
235            SearchPage(DescriptorComparator<D> comparator, List<D> queries, List<D> page, int firstId) {
236                super(queries.size());
237                this.page = page;
238                this.queries = queries;
239                this.comparator = comparator;
240                this.firstId = firstId;
241            }
242    
243            public int getPageSize() {
244                return this.page.size();
245            }
246    
247            public int getQueriesCount() {
248                return this.queries.size();
249            }
250    
251            @Override
252            public SearchPage<D> call() throws Exception {
253                int id = this.firstId;
254                for (D target : this.page) {
255                    for (int i = 0; i < this.bestids.length; i++) {
256                        final double d = this.comparator.calculateDissimilarity(target, this.queries.get(i));
257    
258                        if (this.bestdissims[i] > d) {
259                            this.bestdissims[i] = d;
260                            this.bestids[i] = id;
261                        }
262    
263                        id++;
264                    }
265                }
266    
267                return this;
268            }
269        }
270    
271    
272        /**
273         * Single threaded reference for multi query most similar lookup.
274         *
275         * @param queries   Queries to search
276         * @param po        Observer; will be closed upon finish/abort/error. One work unit is assigned to one comparison.
277         * @return          Results
278         * @throws CancellationException    When cancelled through supplied progress observer
279         */
280        public ImmutableList<SimilarityResultNode> findMostSimilarOnSingleThread(
281                final List<D> queries, final SubProgressObserver po) throws CancellationException {
282    
283            po.switchToDeterminate((long) this.size * queries.size());
284            try {
285    
286                int id = 0;
287    
288                final int [] bestids = new int[queries.size()];
289                final double [] bestdissims = new double[queries.size()];
290                Arrays.fill(bestdissims, Double.MAX_VALUE);
291    
292                for (List<D> page : this.pages) {
293                    for (D target : page) {
294                        for (int i = 0; i < bestids.length; i++) {
295    
296                            final double d = this.comparator.calculateDissimilarity(target, queries.get(i));
297    
298                            if (bestdissims[i] > d) {
299                                bestdissims[i] = d;
300                                bestids[i] = id;
301                            }
302    
303                            /*
304                            if (ret[i] == null || ret[i].getDissimiliarity() > d) {
305                                ret[i] = new SimilarityResultImpl(id, d);
306                            }
307                            */
308                        }
309                        id++;
310                        po.worked(bestids.length);
311                    }
312                }
313    
314                final ImmutableList.Builder<SimilarityResultNode> ret = new ImmutableList.Builder<SimilarityResultNode>();
315                for (int i = 0; i < bestids.length; i++) {
316                    ret.add(new SimilarityResultImpl(bestids[i], bestdissims[i]));
317                }
318                return ret.build();
319    
320                //final SimilarityResultNode[] ret = new SimilarityResultNode[queries.size()];
321                //return ImmutableList.copyOf(ret);
322            } finally {
323                po.done();
324            }
325        }
326    
327    
328        /**
329         * Find most similar for a given query structure.
330         *
331         * <p>This method blocks until ready and uses a single (the calling) thread to do the calculation.</p>
332         *
333         * @param query     Query descriptor
334         * @param po        Progress observer to track progress. Completion is reported by invoking
335         *                  {@link SubProgressObserver#done()} upon completion, cancellation or error
336         * @return          The most similar structure found
337         * @throws CancellationException upon cancellation through the given progress observer
338         */
339        public SimilarityResultNode findMostSimilarOnSingleThread(
340                final D query, final SubProgressObserver po) throws CancellationException {
341    
342            po.switchToDeterminate(this.size);
343            try {
344                SimilarityResultNode ret = null;
345                int id = 0;
346    
347                int bestid = -1;
348                double bestdissim = Double.MAX_VALUE;
349    
350                for (List<D> page : this.pages) {
351    
352    
353                    for (D target : page) {
354                        final double d = this.comparator.calculateDissimilarity(target, query);
355    
356                        if (d < bestdissim) {
357                            bestdissim = d;
358                            bestid = id;
359                        }
360    
361                        /*
362                        if (ret == null || ret.getDissimiliarity() > d) {
363                            ret = new SimilarityResultImpl(id, d);
364                        }
365                        */
366    
367                        id++;
368                        po.worked(1);
369                    }
370                }
371    
372                ret = new SimilarityResultImpl(bestid, bestdissim);
373                return ret;
374            } finally {
375                po.done();
376            }
377        }
378    
379    
380        /**
381         * Find most similars for a given query structure.
382         *
383         * <p>This method blocks until ready and uses a single (the calling) thread to do the calculation.</p>
384         *
385         * @param query     Query descriptor
386         * @param po        Progress observer to track progress. Completion is reported by invoking
387         *                  {@link SubProgressObserver#done()} upon completion, cancellation or error
388         * @param maxCount  Max results count
389         * @return          List of at most the given number of most similars, in increasing dissimilarity order
390         * @throws CancellationException upon cancellation through the given progress observer
391         */
392        public ImmutableList<SimilarityResultNode> findMostSimilarsOnSingleThread(
393                final D query, final SubProgressObserver po, final int maxCount) throws CancellationException {
394    
395            po.switchToDeterminate(this.size);
396    
397            try {
398                final List<SimilarityResultNode> results = new ArrayList<SimilarityResultNode>();
399    
400                int id = 0;
401    
402                for (List<D> page : this.pages) {
403                    for (D target : page) {
404                        final double d = this.comparator.calculateDissimilarity(target, query);
405    
406                        // Avoid accessing possible expensive list operation
407                        if (id == 0) {
408                            // Empty list, insert first
409                            results.add(new SimilarityResultImpl(id, d));
410                        } else if (id < maxCount || results.get(results.size() - 1).getDissimiliarity() > d) {
411                            // New item has place in results
412    
413                            // find the place for the next element
414                            for (int i = 0; i < results.size(); i++) {
415                                if (results.get(i).getDissimiliarity() > d) {
416                                    // d should be placed at position i
417                                    results.add(i, new SimilarityResultImpl(id, d));
418                                    break;
419                                }
420                            }
421                            if (results.size() > maxCount) {
422                                results.remove(results.size() - 1);
423                            }
424                        }
425                    }
426    
427                    id++;
428                    po.worked(1);
429                }
430                return ImmutableList.copyOf(results);
431            } finally {
432                po.done();
433            }
434        }
435    }