001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one
003 *  or more contributor license agreements.  See the NOTICE file
004 *  distributed with this work for additional information
005 *  regarding copyright ownership.  The ASF licenses this file
006 *  to you under the Apache License, Version 2.0 (the
007 *  "License"); you may not use this file except in compliance
008 *  with the License.  You may obtain a copy of the License at
009 *  
010 *    http://www.apache.org/licenses/LICENSE-2.0
011 *  
012 *  Unless required by applicable law or agreed to in writing,
013 *  software distributed under the License is distributed on an
014 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 *  KIND, either express or implied.  See the License for the
016 *  specific language governing permissions and limitations
017 *  under the License. 
018 *  
019 */
020package org.apache.directory.server.xdbm.search.impl;
021
022
023import java.io.IOException;
024import java.util.HashSet;
025import java.util.List;
026import java.util.Set;
027
028import org.apache.directory.api.ldap.model.constants.SchemaConstants;
029import org.apache.directory.api.ldap.model.cursor.Cursor;
030import org.apache.directory.api.ldap.model.exception.LdapException;
031import org.apache.directory.api.ldap.model.exception.LdapOtherException;
032import org.apache.directory.api.ldap.model.filter.AndNode;
033import org.apache.directory.api.ldap.model.filter.ApproximateNode;
034import org.apache.directory.api.ldap.model.filter.AssertionNode;
035import org.apache.directory.api.ldap.model.filter.BranchNode;
036import org.apache.directory.api.ldap.model.filter.EqualityNode;
037import org.apache.directory.api.ldap.model.filter.ExprNode;
038import org.apache.directory.api.ldap.model.filter.ExtensibleNode;
039import org.apache.directory.api.ldap.model.filter.GreaterEqNode;
040import org.apache.directory.api.ldap.model.filter.LeafNode;
041import org.apache.directory.api.ldap.model.filter.LessEqNode;
042import org.apache.directory.api.ldap.model.filter.NotNode;
043import org.apache.directory.api.ldap.model.filter.OrNode;
044import org.apache.directory.api.ldap.model.filter.PresenceNode;
045import org.apache.directory.api.ldap.model.filter.ScopeNode;
046import org.apache.directory.api.ldap.model.filter.SimpleNode;
047import org.apache.directory.api.ldap.model.filter.SubstringNode;
048import org.apache.directory.api.util.Strings;
049import org.apache.directory.server.core.api.partition.Partition;
050import org.apache.directory.server.core.api.partition.PartitionTxn;
051import org.apache.directory.server.i18n.I18n;
052import org.apache.directory.server.xdbm.Index;
053import org.apache.directory.server.xdbm.IndexNotFoundException;
054import org.apache.directory.server.xdbm.Store;
055import org.apache.directory.server.xdbm.search.Optimizer;
056
057
058/**
059 * Optimizer that annotates the filter using scan counts.
060 * 
061 * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
062 */
063public class DefaultOptimizer implements Optimizer
064{
065    /* Package protected*/ static final String CANDIDATES_ANNOTATION_KEY = "candidates";
066    
067    /* Package protected*/ static final String COUNT_ANNOTATION = "count"; 
068
069    /** the database this optimizer operates on */
070    private final Store db;
071    private String contextEntryId;
072
073
074    /**
075     * Creates an optimizer on a database.
076     *
077     * @param db the database this optimizer works for.
078     */
079    public DefaultOptimizer( Store db )
080    {
081        this.db = db;
082    }
083
084
085    // This will suppress PMD.EmptyCatchBlock warnings in this method
086    @SuppressWarnings("PMD.EmptyCatchBlock")
087    private String getContextEntryId( PartitionTxn partitionTxn ) throws LdapException
088    {
089        if ( contextEntryId == null )
090        {
091            try
092            {
093                this.contextEntryId = db.getEntryId( partitionTxn, ( ( Partition ) db ).getSuffixDn() );
094            }
095            catch ( Exception e )
096            {
097                // might not have been created
098            }
099        }
100
101        if ( contextEntryId == null )
102        {
103            return Partition.DEFAULT_ID;
104        }
105
106        return contextEntryId;
107    }
108
109
110    /**
111     * Annotates the expression tree to determine optimal evaluation order based
112     * on the scan count for indices that exist for each expression node.  If an
113     * index on the attribute does not exist an IndexNotFoundException will be
114     * thrown.
115     *
116     * {@inheritDoc}
117     */
118    @Override
119    @SuppressWarnings("unchecked")
120    public Long annotate( PartitionTxn partitionTxn, ExprNode node ) throws LdapException
121    {
122        // Start off with the worst case unless scan count says otherwise.
123        Long count = Long.MAX_VALUE;
124
125        /* --------------------------------------------------------------------
126         *                 H A N D L E   L E A F   N O D E S          
127         * --------------------------------------------------------------------
128         * 
129         * Each leaf node is based on an attribute and it represents a condition
130         * that needs to be statisfied.  We ask the index (if one exists) for 
131         * the attribute to give us a scan count of all the candidates that 
132         * would satisfy the attribute assertion represented by the leaf node.
133         * 
134         * This is conducted differently based on the type of the leaf node.
135         * Comments on each node type explain how each scan count is arrived at.
136         */
137
138        if ( node instanceof ScopeNode )
139        {
140            count = getScopeScan( partitionTxn, ( ScopeNode ) node );
141        }
142        else if ( node instanceof AssertionNode )
143        {
144            /* 
145             * Leave it up to the assertion node to determine just how much it
146             * will cost us.  Anyway it defaults to a maximum scan count if a
147             * scan count is not specified by the implementation.
148             */
149        }
150        else if ( node.isLeaf() )
151        {
152            LeafNode leaf = ( LeafNode ) node;
153
154            try
155            {  
156                if ( node instanceof PresenceNode )
157                {
158                    count = getPresenceScan( partitionTxn, ( PresenceNode ) leaf );
159                }
160                else if ( node instanceof EqualityNode )
161                {
162                    count = getEqualityScan( partitionTxn, ( EqualityNode ) leaf );
163                }
164                else if ( node instanceof GreaterEqNode )
165                {
166                    count = getGreaterLessScan( partitionTxn, ( GreaterEqNode ) leaf, SimpleNode.EVAL_GREATER );
167                }
168                else if ( node instanceof LessEqNode )
169                {
170                    count = getGreaterLessScan( partitionTxn, ( SimpleNode ) leaf, SimpleNode.EVAL_LESSER );
171                }
172                else if ( node instanceof SubstringNode )
173                {
174                    /** Cannot really say so we presume the total index count */
175                    count = getSubstringScan( partitionTxn, ( SubstringNode ) leaf );
176                }
177                else if ( node instanceof ExtensibleNode )
178                {
179                    /** Cannot really say so we presume the total index count */
180                    count = getFullScan( partitionTxn, leaf );
181                }
182                else if ( node instanceof ApproximateNode )
183                {
184                    /** Feature not implemented so we just use equality matching */
185                    count = getEqualityScan( partitionTxn, ( ApproximateNode ) leaf );
186                }
187                else
188                {
189                    throw new IllegalArgumentException( I18n.err( I18n.ERR_711 ) );
190                }
191            }
192            catch ( IndexNotFoundException | IOException e )
193            {
194                throw new LdapOtherException( e.getMessage(), e );
195            }
196        }
197        // --------------------------------------------------------------------
198        //                 H A N D L E   B R A N C H   N O D E S       
199        // --------------------------------------------------------------------
200        else
201        {
202            if ( node instanceof AndNode )
203            {
204                count = getConjunctionScan( partitionTxn, ( AndNode ) node );
205            }
206            else if ( node instanceof OrNode )
207            {
208                count = getDisjunctionScan( partitionTxn, ( OrNode ) node );
209            }
210            else if ( node instanceof NotNode )
211            {
212                annotate( partitionTxn, ( ( NotNode ) node ).getFirstChild() );
213
214                /*
215                 * A negation filter is always worst case since we will have
216                 * to retrieve all entries from the master table then test
217                 * each one against the negated child filter.  There is no way
218                 * to use the indices.
219                 */
220                count = Long.MAX_VALUE;
221            }
222            else
223            {
224                throw new IllegalArgumentException( I18n.err( I18n.ERR_712 ) );
225            }
226        }
227
228        // Protect against overflow when counting.
229        if ( count < 0L )
230        {
231            count = Long.MAX_VALUE;
232        }
233
234        node.set( COUNT_ANNOTATION, count );
235
236        return count;
237    }
238
239
240    /**
241     * ANDs or Conjunctions take the count of the smallest child as their count.
242     * This is the best that a conjunction can do and should be used rather than
243     * the worst case. Notice that we annotate the child node with a recursive 
244     * call before accessing its count parameter making the chain recursion 
245     * depth first.
246     *
247     * @param node a AND (Conjunction) BranchNode
248     * @return the calculated scan count
249     * @throws Exception if there is an error
250     */
251    private long getConjunctionScan( PartitionTxn partitionTxn, BranchNode node ) throws LdapException
252    {
253        long count = Long.MAX_VALUE;
254        List<ExprNode> children = node.getChildren();
255
256        for ( ExprNode child : children )
257        {
258            if ( ( count == 1 ) && ( child instanceof ScopeNode ) )
259            {
260                // We can stop here
261                break;
262            }
263
264            annotate( partitionTxn, child );
265            count = Math.min( ( ( Long ) child.get( COUNT_ANNOTATION ) ), count );
266
267            if ( count == 0 )
268            {
269                // No need to continue
270                break;
271            }
272        }
273
274        return count;
275    }
276
277
278    /**
279     * Disjunctions (OR) are the union of candidates across all subexpressions 
280     * so we add all the counts of the child nodes. Notice that we annotate the 
281     * child node with a recursive call.
282     *
283     * @param node the OR branch node
284     * @return the scan count on the OR node
285     * @throws Exception if there is an error
286     */
287    private long getDisjunctionScan( PartitionTxn partitionTxn, BranchNode node ) throws LdapException
288    {
289        List<ExprNode> children = node.getChildren();
290        long total = 0L;
291
292        for ( ExprNode child : children )
293        {
294            annotate( partitionTxn, child );
295            total += ( Long ) child.get( COUNT_ANNOTATION );
296
297            if ( total == Long.MAX_VALUE )
298            {
299                // We can stop here withoit evaluating the following filters
300                break;
301            }
302        }
303
304        return total;
305    }
306
307
308    /**
309     * Gets the worst case scan count for all entries that satisfy the equality
310     * assertion in the SimpleNode argument.  
311     *
312     * @param node the node to get a scan count for 
313     * @return the worst case
314     * @throws Exception if there is an error accessing an index
315     */
316    @SuppressWarnings("unchecked")
317    private <V> long getEqualityScan( PartitionTxn partitionTxn, SimpleNode<V> node ) throws LdapException, IndexNotFoundException, IOException
318    {
319        if ( db.hasIndexOn( node.getAttributeType() ) )
320        {
321            Index<V, String> idx = ( Index<V, String> ) db.getIndex( node.getAttributeType() );
322
323            String normalizedKey;
324            
325            if ( node.getValue().isSchemaAware() )
326            {
327                normalizedKey = node.getValue().getNormalized();
328            }
329            else
330            {
331                normalizedKey = node.getAttributeType().getEquality().getNormalizer().normalize( node.getValue().getString() );
332            }
333            
334            Cursor<String> result = idx.forwardValueCursor( partitionTxn, ( V ) normalizedKey );
335            Set<String> values = new HashSet<>();
336            int nbFound = 0;
337
338            for ( String value : result )
339            {
340                values.add( value );
341                nbFound++;
342
343                // Arbitrary stop gathering the candidates if we have more than 100
344                if ( nbFound == 100 )
345                {
346                    break;
347                }
348            }
349
350            result.close();
351
352            if ( nbFound < 100 )
353            {
354                // Store the found candidates in the node
355                node.set( CANDIDATES_ANNOTATION_KEY, values );
356
357                return values.size();
358            }
359            else
360            {
361                // Reset the candidates annotation
362                node.set( CANDIDATES_ANNOTATION_KEY, null );
363
364                return idx.count( partitionTxn, ( V ) node.getValue().getNormalized() );
365            }
366        }
367
368        // count for non-indexed attribute is unknown so we presume da worst
369        return Long.MAX_VALUE;
370    }
371
372
373    /**
374     * Gets a scan count of the nodes that satisfy the greater or less than test
375     * specified by the node.
376     *
377     * @param node the greater or less than node to get a count for 
378     * @param isGreaterThan if true test is for >=, otherwise <=
379     * @return the scan count of all nodes satisfying the Ava
380     * @throws Exception if there is an error accessing an index
381     */
382    @SuppressWarnings("unchecked")
383    private <V> long getGreaterLessScan( PartitionTxn partitionTxn, SimpleNode<V> node, boolean isGreaterThan ) throws LdapException, IndexNotFoundException
384    {
385        if ( db.hasIndexOn( node.getAttributeType() ) )
386        {
387            Index<V, String> idx = ( Index<V, String> ) db.getIndex( node.getAttributeType() );
388
389            if ( isGreaterThan )
390            {
391                return idx.greaterThanCount( partitionTxn, ( V ) node.getValue().getString() );
392            }
393            else
394            {
395                return idx.lessThanCount( partitionTxn, ( V ) node.getValue().getString() );
396            }
397        }
398
399        // count for non-indexed attribute is unknown so we presume da worst
400        return Long.MAX_VALUE;
401    }
402
403
404    /**
405     * Get a scan count based on a Substring node : we will count the entries that are greater
406     * than ABC where the filter is (attr=ABC*). Any other filter won't be evaluated (for instance,
407     * a filter like (attr=*ABC) will resolve to a full scan atm - we could have created a reverted
408     * index for such a case -, and filters like (attr=*ABC*) also esolve to a full scan).
409     * 
410     * @param node The substring node
411     * @return The number of candidates
412     * @throws Exception If there is an error accessing an index
413     */
414    private long getSubstringScan( PartitionTxn partitionTxn, SubstringNode node ) throws LdapException, IndexNotFoundException
415    {
416        if ( db.hasIndexOn( node.getAttributeType() ) )
417        {
418            Index<String, String> idx = ( Index<String, String> ) db.getIndex( node.getAttributeType() );
419
420            String initial = node.getInitial();
421
422            if ( Strings.isEmpty( initial ) )
423            {
424                // Not a (attr=ABC*) filter : full index scan
425                return idx.count( partitionTxn );
426            }
427            else
428            {
429                return idx.greaterThanCount( partitionTxn, initial );
430            }
431        }
432        else
433        {
434            // count for non-indexed attribute is unknown so we presume da worst
435            return Long.MAX_VALUE;
436        }
437    }
438
439
440    /**
441     * Gets the total number of entries within the database index if one is 
442     * available otherwise the count of all the entries within the database is
443     * returned.
444     *
445     * @param node the leaf node to get a full scan count for 
446     * @return the worst case full scan count
447     * @throws Exception if there is an error access database indices
448     */
449    private long getFullScan( PartitionTxn partitionTxn, LeafNode node ) throws LdapException, IndexNotFoundException
450    {
451        if ( db.hasIndexOn( node.getAttributeType() ) )
452        {
453            Index<?, ?> idx = db.getIndex( node.getAttributeType() );
454            return idx.count( partitionTxn );
455        }
456
457        return Long.MAX_VALUE;
458    }
459
460
461    /**
462     * Gets the number of entries that would be returned by a presence node
463     * assertion.  Leverages the presence system index for scan counts.
464     *
465     * @param node the presence node
466     * @return the number of entries matched for the presence of an attribute
467     * @throws Exception if errors result
468     */
469    private long getPresenceScan( PartitionTxn partitionTxn, PresenceNode node ) throws LdapException
470    {
471        if ( db.hasUserIndexOn( node.getAttributeType() )
472             || node.getAttributeType().getOid().equals( SchemaConstants.ADMINISTRATIVE_ROLE_AT_OID ) )
473        {
474            Index<String, String> presenceIndex = db.getPresenceIndex();
475
476            return presenceIndex.count( partitionTxn, node.getAttributeType().getOid() );
477        }
478        else if ( db.hasSystemIndexOn( node.getAttributeType() )
479            || ( node.getAttributeType().getOid() == SchemaConstants.ENTRY_UUID_AT_OID ) )
480        {
481            // the system indices (objectClass, entryUUID and entryCSN) are maintained for
482            // each entry, so we could just return the database count
483            return db.count( partitionTxn );
484        }
485
486        return Long.MAX_VALUE;
487    }
488
489
490    /**
491     * Gets the scan count for the scope node attached to this filter.
492     *
493     * @param node the ScopeNode
494     * @return the scan count for scope
495     * @throws Exception if any errors result
496     */
497    private long getScopeScan( PartitionTxn partitionTxn, ScopeNode node ) throws LdapException
498    {
499        String id = node.getBaseId();
500
501        switch ( node.getScope() )
502        {
503            case OBJECT:
504                return 1L;
505
506            case ONELEVEL:
507                return db.getChildCount( partitionTxn, id );
508
509            case SUBTREE:
510                if ( id == getContextEntryId( partitionTxn ) )
511                {
512                    return db.count( partitionTxn );
513                }
514                else
515                {
516                    return db.getRdnIndex().reverseLookup( partitionTxn, id ).getNbDescendants() + 1L;
517                }
518
519            default:
520                throw new IllegalArgumentException( I18n.err( I18n.ERR_713 ) );
521        }
522    }
523}