001    /*
002     * Copyright (c) 1998-2014 ChemAxon Ltd. All Rights Reserved.
003     *
004     * This software is the confidential and proprietary information of
005     * ChemAxon. You shall not disclose such Confidential Information
006     * and shall use it only in accordance with the terms of the agreements
007     * you entered into with ChemAxon.
008     *
009     */
010    
011    package com.chemaxon.overlap.io;
012    
013    import chemaxon.formats.MFileFormatUtil;
014    import chemaxon.marvin.io.MRecord;
015    import chemaxon.marvin.io.MRecordReader;
016    import com.google.common.collect.AbstractIterator;
017    import java.io.Closeable;
018    import java.io.IOException;
019    import java.io.InputStream;
020    import java.util.Arrays;
021    import org.apache.commons.logging.Log;
022    import org.apache.commons.logging.LogFactory;
023    
024    /**
025     * Class encapsulating MRecordReader.
026     *
027     * <p>Closes underlying reader upon explicit close.</p>
028     *
029     * @author Gabor Imre
030     */
031    public class StructureRecordIterator extends AbstractIterator<StructureRecord> implements Closeable {
032    
033        /**
034         * Logger to use.
035         */
036        private final Log log = LogFactory.getLog(StructureRecordIterator.class);
037    
038        /**
039         * Encapsulated MRecordReader.
040         */
041        private final MRecordReader mrecordReader;
042    
043        /**
044         * Next read record number.
045         */
046        private int nextReadNo = 0;
047    
048        /**
049         * Recognized format.
050         *
051         * <p>Returned by {@link MRecordReader#getRecognizedFormat()}
052         */
053        private final String format;
054    
055    
056        /**
057         * Concat molecule name to molString from properties/field_0
058         * Workaround for IO-541
059         */
060        private final boolean concatMoleculeName;
061    
062        /**
063         * New instance.
064         *
065         * <p>
066         * Note that empty stream can not be wrapped. Otherwise {@link #getFormat()} would need to be ambiguous in case of
067         * empty stream.</p>
068         *
069         * @param is InputStream to read from. Note that the passed stream is read but not closed when end reached. Method
070         * {@link #close()} closes the underlying {@link MRecordReader}. Note that if the stream is empty
071         * {@link IllegalArgumentException} thrown.
072         * @param opts Options to pass to underlying
073         *                  {@link MFileFormatUtil#createRecordReader(java.io.InputStream, java.lang.String)}
074         *
075         * @throws IllegalArgumentException when opening was problematic due to exception from the underlying
076         *                                  {@link MRecordReader}.
077         */
078        public StructureRecordIterator(InputStream is, String opts) {
079            super();
080            if (this.log.isDebugEnabled()) {
081                this.log.debug("Construct, is: " + is.toString() + ", opts: " + opts);
082            }
083            try {
084                if (is.available() == 0) {
085                    throw new IllegalArgumentException("Empty stream cannot be read.");
086                }
087                this.mrecordReader = MFileFormatUtil.createRecordReader(is, opts);
088                this.format = this.mrecordReader.getRecognizedFormat();
089    
090                // IO-541 hack
091                //  -
092                if ("smiles".equals(this.format)
093                        || "cxsmiles".equals(this.format)
094                        || "smarts".equals(this.format)
095                        || "cxsmarts".equals(this.format))
096                {
097                    this.concatMoleculeName = true;
098    
099                } else {
100                    this.concatMoleculeName = false;
101                }
102    
103    
104                if (this.log.isDebugEnabled()) {
105                    this.log.debug("    Recognized format: " + this.format);
106                    this.log.debug("    Concat mol name:   " + this.concatMoleculeName);
107                }
108    
109            } catch (Exception ex) {
110                throw new IllegalArgumentException(ex);
111            }
112        }
113    
114        @Override
115        protected StructureRecord computeNext() {
116            if (this.log.isTraceEnabled()) {
117                this.log.trace("computeNext()");
118            }
119            final String molString;
120    
121    
122            final int currReadno = this.nextReadNo++;
123            try {
124                final MRecord rec = this.mrecordReader.nextRecord();
125                // Handle hasNext()
126                if (rec == null) {
127                    // no more records
128                    if (this.log.isTraceEnabled()) {
129                        this.log.trace("    No more records, report end of data");
130                    }
131                    return endOfData();
132                } else {
133                    if (this.log.isTraceEnabled()) {
134                        this.log.trace("    Prop keyss: " + Arrays.toString(rec.getPropertyContainer().getKeys()));
135                    }
136                }
137    
138                // Construct molString considering IO-531 and IO-541
139    
140                // Workaround for IO-531
141                final String msOrig = rec.getMolString() == null ? rec.getString() : rec.getMolString();
142    
143    
144                // Workaround for IO-541
145                // Be prepared for nulls
146                if (this.concatMoleculeName && msOrig != null
147                        && rec.getPropertyContainer() != null && rec.getPropertyContainer().get("field_0") != null) {
148    
149                    final String molNameToInsert = " " + rec.getPropertyContainer().get("field_0").getPropValue();
150                    // Insert molName before terminating '\n' character if present
151                    if (!msOrig.isEmpty() && msOrig.charAt(msOrig.length() - 1) == '\n') {
152                        // Insert before terminating '\n'
153                        molString = msOrig.substring(0, msOrig.length() - 1) + molNameToInsert + '\n';
154                    } else {
155                        // empty or non '\n' terminated
156                        molString = msOrig + molNameToInsert;
157                    }
158    
159    
160                } else {
161                    // no need to insert molname
162                    molString = msOrig;
163                }
164            } catch (Exception ex) {
165                // All problems are forwarded
166                throw new IllegalArgumentException(ex);
167            }
168    
169    
170            // WARNING! implicite reference to the enclosing instance is stored in anonym inner class!
171            final StructureRecord ret = new StructureRecordImpl(this.format, currReadno, molString);
172    
173            if (this.log.isTraceEnabled()) {
174                this.log.trace("    Returning " + ret.toString());
175            }
176    
177            return ret;
178        }
179    
180        @Override
181        public void close() throws IOException {
182            this.mrecordReader.close();
183        }
184    
185        /**
186         * Get format.
187         *
188         * @return Recognized format returned by {@link MRecordReader#getRecognizedFormat()}
189         */
190        public String getFormat() {
191            return this.format;
192        }
193    
194    }