001    /*
002     * Copyright (c) 1998-2014 ChemAxon Ltd. All Rights Reserved.
003     *
004     * This software is the confidential and proprietary information of
005     * ChemAxon. You shall not disclose such Confidential Information
006     * and shall use it only in accordance with the terms of the agreements
007     * you entered into with ChemAxon.
008     *
009     */
010    package com.chemaxon.overlap;
011    
012    import chemaxon.struc.Molecule;
013    import com.chemaxon.calculations.common.SubProgressObserver;
014    import com.chemaxon.descriptors.common.Descriptor;
015    import com.chemaxon.descriptors.common.DescriptorComparator;
016    import com.chemaxon.descriptors.common.DescriptorGenerator;
017    import com.chemaxon.overlap.bruteforce.UnguardedPagedOverlap;
018    import com.chemaxon.overlap.io.StandardizerWrapper;
019    import com.chemaxon.overlap.io.StandardizerWrappers;
020    import com.chemaxon.overlap.storage.PagedDescriptorStorage;
021    import com.chemaxon.overlap.unguarded.UnguardedDissimilarityCalculator;
022    import com.chemaxon.overlap.unguarded.Unguardeds;
023    import com.google.common.base.Function;
024    import com.google.common.base.Functions;
025    import com.google.common.base.Preconditions;
026    import com.google.common.collect.ImmutableList;
027    import java.io.IOException;
028    import java.io.ObjectInputStream;
029    import java.io.Serializable;
030    import java.util.List;
031    import java.util.concurrent.ExecutorService;
032    
033    /**
034     * Context for an overlap analysis.
035     *
036     * <p>This context collects typical domain specific parameters required for an overlap analysis setup and execution.
037     * Some parameters required for setup are considered to be execution environment specific, like the used
038     * {@link ExecutorService}, {@link SubProgressObserver} instances, file references, etc which are not covered by this
039     * context.</p>
040     *
041     * <p>Instances of this cumulative factory class are immutable.</p>
042     *
043     * <p>At least a {@link DescriptorGenerator} have to be specified; this is enforced through initial factory method
044     * {@link #initial(com.chemaxon.descriptors.common.DescriptorGenerator)}. Further members defaults:
045     * <ul><li>standardizer: {@link StandardizerWrappers#identityStandardizer()}</li>
046     * <li>comparator: {@link DescriptorGenerator#getDefaultComparator()}</li>
047     * <li>pagesize: {@link #DEFAULT_PAGESIZE}</li>
048     * <li>extractor: {@link Functions#identity()}</li>
049     * <li>unguarded calculator: {@link Unguardeds#wrap(com.chemaxon.descriptors.common.DescriptorComparator)} (of the
050     * represented comparator</li>
051     * </ul></p>
052     *
053     * <p>Since overlap analysis represents only internal identifiers and unguarded descritptor forms, associations to
054     * original structures, IDs, etc are outside the scope of this context.</p>
055     *
056     * <p><b>IMPORTANT EFFICIENCY CONSIDERATIONS</b>: This context holds reference to {@link DescriptorGenerator} and
057     * {@link StandardizerWrapper} instances which might ensure thread safety using {@link ThreadLocal} members. Overlap
058     * analysis and associated functionality utilize {@link ExecutorService} passed from outside. The possible pooling of
059     * worker threads by the executing environment might interfere with the thread local storage causing memory leak-like
060     * situations. To mitigate this risk following these practices are recommended:
061     * <ul><li><b>Never</b> any {@link OverlapAnalysisContext}, {@link DescriptorGenerator} or {@link StandardizerWrapper}
062     * in a <code>static</code> context</li>
063     * <li><b>Periodically</b> make any instance of the above references unreachable. It is ok to keep the reference
064     * to {@link UnguardedPagedSimilarity} instances.</li>
065     * <li><b>Benchmark</b> the feasibility of deserialization before every search; keeping the memory always allocated
066     * might not provide significant performance gain.</li>
067     * </ul>
068     *
069     * @param <D> Underlying descriptor type
070     * @param <T> Unguarded descriptor type
071     *
072     * TODO: make context {@link Serializable}; prerequisite is to make generators serializable
073     *
074     * @see <a href="http://wiki.apidesign.org/wiki/APIDesignPatterns:CumulativeFactory">http://wiki.apidesign.org/wiki/APIDesignPatterns:CumulativeFactory</a>
075     *
076     * @author Gabor Imre
077     */
078    public final class OverlapAnalysisContext<D extends Descriptor, T extends Serializable> implements Serializable {
079    
080        /**
081         * Serial version.
082         */
083        private static final long serialVersionUID = 0;
084    
085        /**
086         * Default pagesize.
087         */
088        public static final int DEFAULT_PAGESIZE = 50;
089    
090        /**
091         * Represented Standardizer which can not be <code>null</code>.
092         */
093        private final StandardizerWrapper standardizer;
094    
095        /**
096         * Represented descriptor generator which can not be <code>null</code>.
097         */
098        private final DescriptorGenerator<D> generator;
099    
100        /**
101         * Represented descriptor comparator which can not be <code>null</code>.
102         */
103        private final DescriptorComparator<D> comparator;
104    
105        /**
106         * Represented page size.
107         */
108        private final int pagesize;
109    
110        /**
111         * Extractor function which can not be <code>null</code>.
112         */
113        private final Function<D, T> unguardedFormExtractor;
114    
115        /**
116         * Unguarded dissimilarity calculation used for searches which can not be <code>null</code>.
117         */
118        private final UnguardedDissimilarityCalculator<T> unguardedDissimCalc;
119    
120        /**
121         * Constructor.
122         *
123         * @param standardizer  standardizer
124         * @param generator     generator
125         * @param comparator    comparator
126         * @param pagesize      pagesize
127         * @param extractor     extractor
128         * @param unguardedCalc unguarded calculator
129         */
130        private OverlapAnalysisContext(
131                StandardizerWrapper standardizer,
132                DescriptorGenerator<D> generator,
133                DescriptorComparator<D> comparator,
134                int pagesize,
135                Function<D, T> extractor,
136                UnguardedDissimilarityCalculator<T> unguardedCalc) {
137    
138            // Check arguments
139            Preconditions.checkNotNull(standardizer);
140            Preconditions.checkNotNull(generator);
141            Preconditions.checkNotNull(comparator);
142            Preconditions.checkNotNull(extractor);
143            Preconditions.checkNotNull(unguardedCalc);
144            if (pagesize <= 0) {
145                throw new IllegalArgumentException("Illegal page size: " + pagesize);
146            }
147    
148            this.standardizer = standardizer;
149            this.generator = generator;
150            this.comparator = comparator;
151            this.pagesize = pagesize;
152            this.unguardedFormExtractor = extractor;
153            this.unguardedDissimCalc = unguardedCalc;
154        }
155    
156        /**
157         * Empty representation with specified DescriptorGenerator.
158         *
159         * <p>Note that {@link DescriptorGenerator} determines underlying {@link Descriptor} type thus it is not
160         * modifiable. Also, when calling this method the represented {@link DescriptorComparator} defaults to the
161         * comparator returned by {@link DescriptorGenerator#getDefaultComparator()}.</p>
162         *
163         * @param   <D>         Underlying descriptor type
164         * @param   generator   DescriptorGenerator represented
165         * @return  Generator set; pagesize set to the {@link #DEFAULT_PAGESIZE}; others initialized to defaults
166         */
167        public static <D extends Descriptor> OverlapAnalysisContext<D, byte[]> initial(DescriptorGenerator<D> generator) {
168            final DescriptorComparator<D> c = generator.getDefaultComparator();
169            return new OverlapAnalysisContext(
170                    StandardizerWrappers.identityStandardizer(),
171                    generator,
172                    c,
173                    DEFAULT_PAGESIZE,
174                    Unguardeds.wrapGenerator(generator), Unguardeds.wrapComparator(c));
175        }
176    
177        /**
178         * Empty representation with specified DescriptorGenerator and comparator.
179         *
180         * <p>Note that {@link DescriptorGenerator} determines underlying {@link Descriptor} type thus it is not
181         * modifiable.</p>
182         *
183         * @param   <D>         Underlying descriptor type
184         * @param   generator   DescriptorGenerator represented
185         * @param   comparator  DescriptorComparator represented
186         * @return  Generator and comparatorset; pagesize set to the {@link #DEFAULT_PAGESIZE}; others initialized to their
187         *          defaults
188         */
189        public static <D extends Descriptor> OverlapAnalysisContext<D, byte[]> initial(
190                DescriptorGenerator<D> generator, DescriptorComparator<D> comparator) {
191            return new OverlapAnalysisContext(
192                    StandardizerWrappers.identityStandardizer(),
193                    generator,
194                    comparator,
195                    DEFAULT_PAGESIZE,
196                    Unguardeds.wrapGenerator(generator), Unguardeds.wrapComparator(comparator));
197        }
198    
199    
200        /**
201         * Update pagesize.
202         *
203         * @param pagesize New pagesize
204         * @return         New instance holding updated parameter
205         */
206        public OverlapAnalysisContext<D, T> pagesize(int pagesize) {
207            if (pagesize <= 0) {
208                throw new IllegalArgumentException("Invalid pagesize " + pagesize);
209            }
210            return new OverlapAnalysisContext(
211                    this.standardizer,
212                    this.generator,
213                    this.comparator,
214                    pagesize,
215                    this.unguardedFormExtractor,
216                    this.unguardedDissimCalc);
217        }
218    
219        /**
220         * Update standardizer.
221         *
222         * @param standardizer New standardizer
223         * @return             New instance holding updated parameter
224         */
225        public OverlapAnalysisContext<D, T> standardizer(StandardizerWrapper standardizer) {
226            Preconditions.checkNotNull(standardizer);
227            return new OverlapAnalysisContext<D, T>(
228                    standardizer,
229                    this.generator,
230                    this.comparator,
231                    this.pagesize,
232                    this.unguardedFormExtractor,
233                    this.unguardedDissimCalc);
234        }
235    
236        /**
237         * Update represented unguarded layer.
238         *
239         * @param <N>             New unguarded representation type
240         * @param extractor       Extractor of unguarded representation
241         * @param unguardedCalc   Unguarded comparator
242         * @return                New instance holding updated parameter
243         */
244        public <N extends Serializable> OverlapAnalysisContext<D, N> unguarded(
245                Function<D, N> extractor, UnguardedDissimilarityCalculator<N> unguardedCalc) {
246            Preconditions.checkNotNull(extractor);
247            Preconditions.checkNotNull(unguardedCalc);
248    
249            return new OverlapAnalysisContext(
250                    this.standardizer,
251                    this.generator,
252                    this.comparator,
253                    this.pagesize,
254                    extractor,
255                    unguardedCalc);
256        }
257    
258        /**
259         * Gets represented descriptor generator.
260         *
261         * @return  Represented generator
262         */
263        public DescriptorGenerator<D> getDescriptorGenerator() {
264            return this.generator;
265        }
266    
267        /**
268         * Gets represented descriptor comparator.
269         *
270         * @return  Represented comparator
271         */
272        public DescriptorComparator<D> getDescriptorComparator() {
273            return this.comparator;
274        }
275    
276        /**
277         * Gets represented standardizer wrapper.
278         *
279         * @return  Represented standardizer
280         */
281        public StandardizerWrapper getStandardizer() {
282            return this.standardizer;
283        }
284    
285        /**
286         * Gets represented unguarded form extractor.
287         *
288         * @return  Represented extractot function
289         */
290        public Function<D, T> getUnguardedExtractor() {
291            return this.unguardedFormExtractor;
292        }
293    
294        /**
295         * Gets represented page size.
296         *
297         * @return  Represented page size
298         */
299        public int getPagesize() {
300            return this.pagesize;
301        }
302    
303    
304        /**
305         * Gets represented unguarded dissimilarty calculator.
306         *
307         * @return  Represented ungarded dissimilarity calculator
308         */
309        public UnguardedDissimilarityCalculator<T> getUnguardedDissimilarityCalculator() {
310            return this.unguardedDissimCalc;
311        }
312    
313        /**
314         * Create an empty storage.
315         *
316         * <p>Note that the returned storage has no reference to the serializer of this context.</p>
317         *
318         * @return Empty storage
319         */
320        public PagedDescriptorStorage<D> createEmptyStorage() {
321            return new PagedDescriptorStorage<D>(this.pagesize, this.generator);
322        }
323    
324        // Direct file Serializer!
325    
326        /**
327         * Deserialize an {@link UnguardedPagedSimilarity} from binary serialized from.
328         *
329         * <p>This method delegates call to
330         * {@link PagedDescriptorStorage#deserializeUnguarded(int, com.chemaxon.descriptors.common.DescriptorGenerator, com.google.common.base.Function, com.chemaxon.overlap.unguarded.UnguardedDissimilarityCalculator, java.io.ObjectInputStream, com.chemaxon.calculations.common.SubProgressObserver)}
331         * passing represented context information.</p>
332         *
333         * @param ois   Source stream
334         * @param po    Progress observer to track progress
335         * @return      Deserialized searcher
336         * @throws IOException              Propagated from deserialization
337         * @throws ClassNotFoundException   Propagated from deserialization
338         */
339        public UnguardedPagedOverlap<T> deserializeUnguardedPagedOverlap(
340                ObjectInputStream ois,
341                SubProgressObserver po) throws IOException, ClassNotFoundException {
342            return PagedDescriptorStorage.deserializeUnguarded(
343                    this.pagesize,
344                    this.generator,
345                    this.unguardedFormExtractor,
346                    this.unguardedDissimCalc,
347                    ois,
348                    po);
349        }
350    
351        /**
352         * Transform molecules according to the represented context.
353         *
354         * <p>For each molecule the following steps are executed:
355         * <ul><li>Cloning the molecule for further steps using {@link Molecule#cloneMolecule()}</li>
356         * <li>Standardize with the represented {@link StandardizerWrapper} using
357         * {@link StandardizerWrapper#standardize(chemaxon.struc.Molecule)}</li>
358         * <li>Generate descriptor with the represented {@link DescriptorGenerator} using
359         * {@link DescriptorGenerator#generateDescriptor(chemaxon.struc.Molecule)}</li>
360         * <li>Retrieve unguarded descriptor contents with the represented retriever {@link Function}</li></ul>
361         *
362         * @param molecules Molecules to transform. The molecules are cloned using {@link Molecule#cloneMolecule()} before
363         * transformations.
364         * @param po        Progress observer to track progress
365         * @return          List of unguarded descriptors suitable to pass to searchers.
366         */
367        public ImmutableList<T> transformMolecules(List<Molecule> molecules, SubProgressObserver po) {
368            po.switchToDeterminate(molecules.size());
369            try {
370                final ImmutableList.Builder<T> retb = new ImmutableList.Builder<T>();
371                for (Molecule m : molecules) {
372                    final Molecule mc = m.cloneMolecule();
373                    this.standardizer.standardize(mc);
374                    final D d = this.generator.generateDescriptor(mc);
375                    final T t = this.unguardedFormExtractor.apply(d);
376                    retb.add(t);
377                    po.worked(1);
378                }
379                return retb.build();
380            } finally {
381                po.done();
382            }
383        }
384    
385        /**
386         * Create String representation.
387         *
388         * @param multiline Create multiline (more readable) output.
389         *
390         * @return String representation
391         */
392        public String toString(boolean multiline) {
393            if (multiline) {
394                return "Overlap analysis context.\n"
395                        + "    Pagesize:       " + this.pagesize + "\n"
396                        + "    Standardizer:   " + this.standardizer + "\n"
397                        + "    Generator:      " + this.generator + "\n"
398                        + "    Comparator:     " + this.comparator + "\n"
399                        + "    Extractor:      " + this.unguardedFormExtractor + "\n"
400                        + "    Unguarded calc: " + this.unguardedDissimCalc + "\n";
401            } else {
402                return "Overlap analysis context."
403                        + " Pagesize: " + this.pagesize
404                        + " Standardizer: " + this.standardizer
405                        + " Generator: " + this.generator
406                        + " Comparator: " + this.comparator
407                        + " Extractor: " + this.unguardedFormExtractor
408                        + " Unguarded calc: " + this.unguardedDissimCalc;
409    
410            }
411        }
412    
413        @Override
414        public String toString() {
415            return toString(false);
416        }
417    }