Added test for TODOs in Django. Removed n^2 scan for merging comments.
[idea/community.git] / platform / lang-impl / src / com / intellij / psi / impl / search / IndexPatternSearcher.java
1 /*
2  * Copyright 2000-2009 JetBrains s.r.o.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 package com.intellij.psi.impl.search;
18
19 import com.intellij.lang.Language;
20 import com.intellij.lang.LanguageParserDefinitions;
21 import com.intellij.lang.ParserDefinition;
22 import com.intellij.lexer.Lexer;
23 import com.intellij.openapi.extensions.Extensions;
24 import com.intellij.openapi.fileTypes.FileType;
25 import com.intellij.openapi.fileTypes.SyntaxHighlighter;
26 import com.intellij.openapi.fileTypes.SyntaxHighlighterFactory;
27 import com.intellij.openapi.fileTypes.impl.AbstractFileType;
28 import com.intellij.openapi.progress.ProgressManager;
29 import com.intellij.openapi.util.TextRange;
30 import com.intellij.openapi.vfs.VirtualFile;
31 import com.intellij.psi.*;
32 import com.intellij.psi.impl.PsiManagerEx;
33 import com.intellij.psi.impl.cache.CacheManager;
34 import com.intellij.psi.search.IndexPattern;
35 import com.intellij.psi.search.IndexPatternOccurrence;
36 import com.intellij.psi.search.IndexPatternProvider;
37 import com.intellij.psi.search.searches.IndexPatternSearch;
38 import com.intellij.psi.tree.IElementType;
39 import com.intellij.psi.tree.TokenSet;
40 import com.intellij.util.Processor;
41 import com.intellij.util.QueryExecutor;
42 import com.intellij.util.containers.ContainerUtil;
43 import com.intellij.util.text.CharSequenceSubSequence;
44 import gnu.trove.TIntArrayList;
45 import org.jetbrains.annotations.NotNull;
46
47 import java.util.Set;
48 import java.util.regex.Matcher;
49 import java.util.regex.Pattern;
50
51 /**
52  * @author yole
53  */
54 public class IndexPatternSearcher implements QueryExecutor<IndexPatternOccurrence, IndexPatternSearch.SearchParameters> {
55   public boolean execute(@NotNull final IndexPatternSearch.SearchParameters queryParameters,
56                          @NotNull final Processor<IndexPatternOccurrence> consumer) {
57     final PsiFile file = queryParameters.getFile();
58     VirtualFile virtualFile = file.getVirtualFile();
59     if (file instanceof PsiBinaryFile || file instanceof PsiCompiledElement || virtualFile == null) {
60       return true;
61     }
62
63     final CacheManager cacheManager = ((PsiManagerEx)file.getManager()).getCacheManager();
64     final IndexPatternProvider patternProvider = queryParameters.getPatternProvider();
65     int count = patternProvider != null
66                 ? cacheManager.getTodoCount(virtualFile, patternProvider)
67                 : cacheManager.getTodoCount(virtualFile, queryParameters.getPattern());
68     if (count == 0) return true;
69
70     return executeImpl(queryParameters, consumer);
71   }
72
73   protected boolean executeImpl(IndexPatternSearch.SearchParameters queryParameters,
74                               Processor<IndexPatternOccurrence> consumer) {
75     final IndexPatternProvider patternProvider = queryParameters.getPatternProvider();
76     final PsiFile file = queryParameters.getFile();
77     TIntArrayList commentStarts = new TIntArrayList();
78     TIntArrayList commentEnds = new TIntArrayList();
79
80     final CharSequence chars = file.getViewProvider().getContents();
81     findCommentTokenRanges(file, chars, queryParameters.getRange(), commentStarts, commentEnds);
82
83     for (int i = 0; i < commentStarts.size(); i++) {
84       int commentStart = commentStarts.get(i);
85       int commentEnd = commentEnds.get(i);
86
87       if (patternProvider != null) {
88         for (final IndexPattern pattern : patternProvider.getIndexPatterns()) {
89           if (!collectPatternMatches(pattern, chars, commentStart, commentEnd, file, queryParameters.getRange(), consumer)) {
90             return false;
91           }
92         }
93       }
94       else {
95         if (!collectPatternMatches(queryParameters.getPattern(), chars, commentStart, commentEnd, file, queryParameters.getRange(),
96                                    consumer)) {
97           return false;
98         }
99       }
100     }
101
102     return true;
103   }
104
105
106   private static final TokenSet COMMENT_TOKENS =
107     TokenSet.create(CustomHighlighterTokenType.LINE_COMMENT, CustomHighlighterTokenType.MULTI_LINE_COMMENT);
108
109   private static void findCommentTokenRanges(final PsiFile file,
110                                              final CharSequence chars,
111                                              final TextRange range,
112                                              final TIntArrayList commentStarts,
113                                              final TIntArrayList commentEnds) {
114     if (file instanceof PsiPlainTextFile) {
115       FileType fType = file.getFileType();
116       synchronized (PsiLock.LOCK) {
117         if (fType instanceof AbstractFileType) {
118           Lexer lexer = SyntaxHighlighter.PROVIDER.create(fType, file.getProject(), file.getVirtualFile()).getHighlightingLexer();
119           findComments(lexer, chars, range, COMMENT_TOKENS, commentStarts, commentEnds, null);
120         }
121         else {
122           commentStarts.add(0);
123           commentEnds.add(file.getTextLength());
124         }
125       }
126     }
127     else {
128       // collect comment offsets to prevent long locks by PsiManagerImpl.LOCK
129       synchronized (PsiLock.LOCK) {
130         final FileViewProvider viewProvider = file.getViewProvider();
131         final Set<Language> relevantLanguages = viewProvider.getLanguages();
132         for (Language lang : relevantLanguages) {
133           final TIntArrayList commentStartsList = new TIntArrayList();
134           final TIntArrayList commentEndsList = new TIntArrayList();
135
136           final SyntaxHighlighter syntaxHighlighter =
137             SyntaxHighlighterFactory.getSyntaxHighlighter(lang, file.getProject(), file.getVirtualFile());
138           Lexer lexer = syntaxHighlighter.getHighlightingLexer();
139           TokenSet commentTokens = null;
140           IndexPatternBuilder builderForFile = null;
141           for (IndexPatternBuilder builder : Extensions.getExtensions(IndexPatternBuilder.EP_NAME)) {
142             Lexer lexerFromBuilder = builder.getIndexingLexer(file);
143             if (lexerFromBuilder != null) {
144               lexer = lexerFromBuilder;
145               commentTokens = builder.getCommentTokenSet(file);
146               builderForFile = builder;
147             }
148           }
149           if (builderForFile == null) {
150             final ParserDefinition parserDefinition = LanguageParserDefinitions.INSTANCE.forLanguage(lang);
151             if (parserDefinition != null) {
152               commentTokens = parserDefinition.getCommentTokens();
153             }
154           }
155
156           if (commentTokens != null) {
157             findComments(lexer, chars, range, commentTokens, commentStartsList, commentEndsList, builderForFile);
158             mergeCommentLists(commentStarts, commentEnds, commentStartsList, commentEndsList);
159           }
160         }
161       }
162     }
163   }
164
165   private static void mergeCommentLists(TIntArrayList commentStarts,
166                                         TIntArrayList commentEnds,
167                                         TIntArrayList commentStartsList,
168                                         TIntArrayList commentEndsList) {
169     if (commentStarts.size() == 0 && commentEnds.size() == 0) {
170       commentStarts.add(commentStartsList.toNativeArray());
171       commentEnds.add(commentEndsList.toNativeArray());
172       return;
173     }
174
175     ContainerUtil.mergeSortedArrays(commentStarts, commentEnds, commentStartsList, commentEndsList);
176   }
177
178   private static void findComments(final Lexer lexer,
179                                    final CharSequence chars,
180                                    final TextRange range,
181                                    final TokenSet commentTokens,
182                                    final TIntArrayList commentStarts,
183                                    final TIntArrayList commentEnds,
184                                    final IndexPatternBuilder builderForFile) {
185     for (lexer.start(chars); ; lexer.advance()) {
186       IElementType tokenType = lexer.getTokenType();
187       if (tokenType == null) break;
188
189       if (range != null) {
190         if (lexer.getTokenEnd() <= range.getStartOffset()) continue;
191         if (lexer.getTokenStart() >= range.getEndOffset()) break;
192       }
193
194       boolean isComment = commentTokens.contains(tokenType);
195       if (!isComment) {
196         final Language commentLang = tokenType.getLanguage();
197         final ParserDefinition parserDefinition = LanguageParserDefinitions.INSTANCE.forLanguage(commentLang);
198         if (parserDefinition != null) {
199           final TokenSet langCommentTokens = parserDefinition.getCommentTokens();
200           isComment = langCommentTokens.contains(tokenType);
201         }
202       }
203
204       if (isComment) {
205         final int startDelta = builderForFile != null ? builderForFile.getCommentStartDelta(lexer.getTokenType()) : 0;
206         final int endDelta = builderForFile != null ? builderForFile.getCommentEndDelta(lexer.getTokenType()) : 0;
207
208         int start = lexer.getTokenStart() + startDelta;
209         int end = lexer.getTokenEnd() - endDelta;
210         assert start <= end : "Invalid comment range: " +
211                               new TextRange(start, end) +
212                               "; lexer token range=" +
213                               new TextRange(lexer.getTokenStart(), lexer.getTokenEnd()) +
214                               "; delta=" +
215                               new TextRange(startDelta, endDelta) +
216                               "; lexer=" +
217                               lexer +
218                               "; builder=" +
219                               builderForFile +
220                               "; chars length:" +
221                               chars.length();
222         assert end <= chars.length() : "Invalid comment end: " +
223                                        new TextRange(start, end) +
224                                        "; lexer token range=" +
225                                        new TextRange(lexer.getTokenStart(), lexer.getTokenEnd()) +
226                                        "; delta=" +
227                                        new TextRange(startDelta, endDelta) +
228                                        "; lexer=" +
229                                        lexer +
230                                        "; builder=" +
231                                        builderForFile +
232                                        "; chars length:" +
233                                        chars.length();
234         commentStarts.add(start);
235         commentEnds.add(end);
236       }
237     }
238   }
239
240   private static boolean collectPatternMatches(IndexPattern indexPattern,
241                                                CharSequence chars,
242                                                int commentStart,
243                                                int commentEnd,
244                                                PsiFile file,
245                                                TextRange range,
246                                                Processor<IndexPatternOccurrence> consumer) {
247     Pattern pattern = indexPattern.getPattern();
248     if (pattern != null) {
249       ProgressManager.checkCanceled();
250
251       CharSequence input = new CharSequenceSubSequence(chars, commentStart, commentEnd);
252       Matcher matcher = pattern.matcher(input);
253       while (true) {
254         //long time1 = System.currentTimeMillis();
255         boolean found = matcher.find();
256         //long time2 = System.currentTimeMillis();
257         //System.out.println("scanned text of length " + (lexer.getTokenEnd() - lexer.getTokenStart() + " in " + (time2 - time1) + " ms"));
258
259         if (!found) break;
260         int start = matcher.start() + commentStart;
261         int end = matcher.end() + commentStart;
262         if (start != end) {
263           if (range == null || range.getStartOffset() <= start && end <= range.getEndOffset()) {
264             if (!consumer.process(new IndexPatternOccurrenceImpl(file, start, end, indexPattern))) {
265               return false;
266             }
267           }
268         }
269
270         ProgressManager.checkCanceled();
271       }
272     }
273     return true;
274   }
275 }