diff --git a/typechecker/src/org/eclipse/ceylon/compiler/typechecker/analyzer/AnalyzerUtil.java b/typechecker/src/org/eclipse/ceylon/compiler/typechecker/analyzer/AnalyzerUtil.java index 1965f7268e7..6aa62d6d7af 100644 --- a/typechecker/src/org/eclipse/ceylon/compiler/typechecker/analyzer/AnalyzerUtil.java +++ b/typechecker/src/org/eclipse/ceylon/compiler/typechecker/analyzer/AnalyzerUtil.java @@ -79,7 +79,6 @@ */ public class AnalyzerUtil { -// static final JaroWinkler distance = new JaroWinkler(); static final NormalizedLevenshtein distance = new NormalizedLevenshtein(); diff --git a/typechecker/src/org/eclipse/ceylon/compiler/typechecker/util/JaroWinkler.java b/typechecker/src/org/eclipse/ceylon/compiler/typechecker/util/JaroWinkler.java deleted file mode 100644 index 45564a740ff..00000000000 --- a/typechecker/src/org/eclipse/ceylon/compiler/typechecker/util/JaroWinkler.java +++ /dev/null @@ -1,160 +0,0 @@ -/*Copyright 2015 Thibault Debatty. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is furnished -to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - -//TAKEN FROM https://github.com/tdebatty/java-string-similarity - -package org.eclipse.ceylon.compiler.typechecker.util; - -import java.util.Arrays; - -/** - * The Jaro–Winkler distance metric is designed and best suited for short - * strings such as person names, and to detect typos; it is (roughly) a - * variation of Damerau-Levenshtein, where the substitution of 2 close - * characters is considered less important then the substitution of 2 characters - * that a far from each other. - * Jaro-Winkler was developed in the area of record linkage (duplicate - * detection) (Winkler, 1990). It returns a value in the interval [0.0, 1.0]. - * The distance is computed as 1 - Jaro-Winkler similarity. - * @author Thibault Debatty - */ -public class JaroWinkler { - - - public static void main(String[] args) { - JaroWinkler jw = new JaroWinkler(); - - System.out.println(jw.similarity("My string", "My string")); - - // substitution of s and t - System.out.println(jw.similarity("My string", "My tsring")); - - // substitution of s and n - System.out.println(jw.similarity("My string", "My ntrisg")); - } - - - public JaroWinkler() { - - } - - public JaroWinkler(double threshold) { - this.setThreshold(threshold); - } - - private double threshold = 0.7; - - /** - * Sets the threshold used to determine when Winkler bonus should be used. - * Set to a negative value to get the Jaro distance. - * Default value is 0.7 - * - * @param threshold the new value of the threshold - */ - public final void setThreshold(double threshold) { - this.threshold = threshold; - } - - /** - * Returns the current value of the threshold used for adding the Winkler - * bonus. The default value is 0.7. - * - * @return the current value of the threshold - */ - public double getThreshold() { - return threshold; - } - - public double similarity(String s1, String s2) { - int[] mtp = matches(s1, s2); - float m = mtp[0]; - if (m == 0) { - return 0f; - } - float j = ((m / s1.length() + m / s2.length() + (m - mtp[1]) / m)) / 3; - float jw = j < getThreshold() ? j : j + Math.min(0.1f, 1f / mtp[3]) * mtp[2] - * (1 - j); - return jw; - } - - - public double distance(String s1, String s2) { - return 1.0 - similarity(s1, s2); - } - - - private int[] matches(String s1, String s2) { - String max, min; - if (s1.length() > s2.length()) { - max = s1; - min = s2; - } else { - max = s2; - min = s1; - } - int range = Math.max(max.length() / 2 - 1, 0); - int[] matchIndexes = new int[min.length()]; - Arrays.fill(matchIndexes, -1); - boolean[] matchFlags = new boolean[max.length()]; - int matches = 0; - for (int mi = 0; mi < min.length(); mi++) { - char c1 = min.charAt(mi); - for (int xi = Math.max(mi - range, 0), - xn = Math.min(mi + range + 1, max.length()); xi < xn; xi++) { - if (!matchFlags[xi] && c1 == max.charAt(xi)) { - matchIndexes[mi] = xi; - matchFlags[xi] = true; - matches++; - break; - } - } - } - char[] ms1 = new char[matches]; - char[] ms2 = new char[matches]; - for (int i = 0, si = 0; i < min.length(); i++) { - if (matchIndexes[i] != -1) { - ms1[si] = min.charAt(i); - si++; - } - } - for (int i = 0, si = 0; i < max.length(); i++) { - if (matchFlags[i]) { - ms2[si] = max.charAt(i); - si++; - } - } - int transpositions = 0; - for (int mi = 0; mi < ms1.length; mi++) { - if (ms1[mi] != ms2[mi]) { - transpositions++; - } - } - int prefix = 0; - for (int mi = 0; mi < min.length(); mi++) { - if (s1.charAt(mi) == s2.charAt(mi)) { - prefix++; - } else { - break; - } - } - return new int[]{matches, transpositions / 2, prefix, max.length()}; - } -}