001    /*
002     * Copyright (c) 1998-2014 ChemAxon Ltd. All Rights Reserved.
003     *
004     * This software is the confidential and proprietary information of
005     * ChemAxon. You shall not disclose such Confidential Information
006     * and shall use it only in accordance with the terms of the agreements
007     * you entered into with ChemAxon.
008     *
009     */
010    
011    package com.chemaxon.overlap.io;
012    
013    import chemaxon.formats.MFileFormatUtil;
014    import chemaxon.struc.Molecule;
015    import com.chemaxon.calculations.common.SubProgressObserver;
016    import com.chemaxon.descriptors.common.Descriptor;
017    import com.google.common.base.Optional;
018    import java.io.InputStream;
019    import java.util.concurrent.ExecutorService;
020    
021    /**
022     * <code>Updater</code> allows the insertion of structures/descriptors to the similarity subsystem.
023     *
024     * <p>An updater allows partially filled descriptor space in order to allow incremental/short updates.</p>
025     *
026     * @param <D> Represented descriptor
027     *
028     * @author Gabor Imre
029     */
030    public interface Updater<D extends Descriptor> {
031    
032        /**
033         * Read all molecules from a structure file into the similarity subsystem.
034         *
035         * <p>Consecutive members of a structure file have consecutive indexes associated. Usually first molecule in the
036         * file have index value 0 associated. To allow segmented reading, this method can be called multiple times to
037         * append additional structures.</p>
038         *
039         * <p>Consistency considerations: the storage is left in a consistent state in case of the following abnormal or
040         * unexpected terminations:
041         * <ul><li>Cancellation through the supplied {@link SubProgressObserver}</li>
042         * <li>Error thrown from the supplied {@link MoleculeCallback</li>
043         * <li>Processing error from structures</li>
044         * <li>IO error from file reading (this kind of errors are re-thrown)</li>
045         * </ul>
046         *
047         * <p>Notes on multithreading:
048         * <ul>
049         * <li>This method blocks until the execution completes, aborts or fails</li>
050         * <li>File reading, accessing callbacks (Supplied progress observer, supplied moleculeCallback) is done on the
051         * calling thread.</li>
052         * <li>Molecule standardization and descriptor generation is executed using the supplied
053         * ExecutorService</li>
054         * </ul></p>
055         *
056         * TODO: shorten parameters list using a builder
057         *
058         * @param is                    Input stream to read from. Note that the stream is <b>not</b> closed when returning.
059         * @param opts                  Input options or <code>null</code> to pass to underlying
060         *                              {@link MFileFormatUtil#createRecordReader(java.io.InputStream, java.lang.String)}
061         * @param skipCount             Skip given number of structures. Skipped structures are also reported to the
062         *                              given progress observer like ordinary processed structures, however they wont
063         *                              generate calls into the supplied {@link MoleculeCallback}.
064         * @param maxProcessCount       Read at most given number of structures. Count starts after skipping structures.
065         * @param standardizer          Standardizer to apply on molecules. See {@link StandardizerWrappers} for utility
066         *                              methods. Note that supplied wrapper must be thread safe.
067         * @param po                    ProgressObserver to track file read. Total reported work units are assigned to read
068         *                              and processed/skipped molecules count. The given observer <p>is closed</p> upon
069         *                              returning
070         * @param e                     ExecutorService to run descriptor generation for pages
071         * @param moleculeCallback      Callback to report back assigned indexes/processing errors.
072         * @throws IllegalArgumentException   When an IO error occurs during file reading or structure importing throws an
073         *                              exception
074         */
075        void addAll(
076                InputStream is,
077                String opts,
078                int skipCount,
079                int maxProcessCount,
080                StandardizerWrapper standardizer,
081                SubProgressObserver po,
082                ExecutorService e,
083                MoleculeCallback moleculeCallback);
084    
085        /**
086         * Add a single molecule to the similarity subsystem.
087         *
088         * <p>Note that the given molecule <b>must be standardized</b> before calling this method.</p>
089         *
090         * @param m     Molecule to be added
091         * @return      Associated index of the structure
092         * @throws      IllegalArgumentException in case of processing error. If exception thrown the storage is kept in
093         *              a consistent state.
094         */
095        int addMolecule(Molecule m);
096    
097        /**
098         * Add a single descriptor to the similarity subsystem.
099         *
100         * <p>Note that descriptors have a compatibility related API contract (currently references returned by
101         * {@link Descriptor#getDescriptorGenerator()} must be equal for compatible descriptors) which must be satisfied by
102         * the passed descriptor.</p>
103         *
104         * @param d     Descriptor to be added
105         * @return      Associated index of the represented structure
106         * @throws      IllegalArgumentException when passed descriptor is not compatible. If exception thrown the storage
107         *              is kept in a consistent state
108         */
109        int addDescriptor(D d);
110    }