1 package com.intellij.stats.ngram
3 import com.intellij.psi.PsiElement
4 import com.intellij.psi.PsiFile
5 import com.intellij.util.io.DataInputOutputUtil
6 import com.intellij.util.io.KeyDescriptor
7 import java.io.DataInput
8 import java.io.DataOutput
10 data class NGram(val elements: List<String>) {
12 fun append(token: String): NGram {
13 return NGram(listOf(*elements.toTypedArray(), token))
16 fun dropHead(): NGram {
17 return NGram(elements.subList(1, elements.size))
21 val INVALID = NGram(emptyList())
25 fun processFile(psiFile: PsiFile, content: CharSequence): HashMap<NGram, Int> {
26 val result = HashMap<NGram, Int>()
27 val elements = TreeTraversal.getElements(psiFile)
28 if (elements.size > N) {
29 for (i in N until elements.size) {
30 if (NGramElementProvider.shouldIndex(elements[i], content)) {
31 val nGramElements = ArrayList<String>()
33 nGramElements.add(NGramElementProvider.getElementRepresentation(elements[i - j]))
35 nGramElements.add(elements[i].text)
37 val ngram = NGram(nGramElements.subList(j, nGramElements.size))
38 val oldValue = result.putIfAbsent(ngram, 1)
40 result.put(ngram, oldValue + 1)
49 fun getNGramForElement(element: PsiElement) : NGram {
50 val elements = TreeTraversal.getElements(element.containingFile ?: return NGram.INVALID)
51 val index = elements.indexOf(element.parent.node)
55 val nGramElements = ArrayList<String>()
56 for (i in 1..NGram.N) {
57 nGramElements.add(elements[index - i].elementType.toString())
59 return NGram(nGramElements)
64 fun DataOutput.writeINT(x : Int) = DataInputOutputUtil.writeINT(this, x)
65 fun DataInput.readINT() : Int = DataInputOutputUtil.readINT(this)
67 object NGramKeyDescriptor: KeyDescriptor<NGram> {
68 override fun save(out: DataOutput, nGram: NGram?) {
69 val instance = NGramEnumeratingService.getInstance()
70 out.writeINT(nGram!!.elements.size)
71 nGram.elements.forEach { out.writeINT(instance.enumerateString(it)) }
74 override fun read(`in`: DataInput): NGram {
75 val instance = NGramEnumeratingService.getInstance()
76 val size = `in`.readINT()
77 return NGram((1..size).map { instance.valueOf(`in`.readINT()) })
80 override fun isEqual(p0: NGram?, p1: NGram?): Boolean {
84 override fun getHashCode(p0: NGram?): Int {
85 return p0!!.hashCode()