|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object com.wcohen.ss.AbstractStringDistance com.wcohen.ss.AbstractTokenizedStringDistance com.wcohen.ss.JensenShannonDistance
public abstract class JensenShannonDistance
Distance metrics based on Jensen-Shannon distance of two smoothed unigram language models.
Field Summary |
---|
Fields inherited from class com.wcohen.ss.AbstractTokenizedStringDistance |
---|
tokenizer |
Constructor Summary | |
---|---|
JensenShannonDistance()
|
|
JensenShannonDistance(Tokenizer tokenizer)
|
Method Summary | |
---|---|
protected double |
backgroundProb(Token tok)
Probability of token in the background language model |
java.lang.String |
explainScore(StringWrapper s,
StringWrapper t)
This method needs to be implemented by subclasses. |
StringWrapper |
prepare(java.lang.String s)
Preprocess a string by finding tokens and giving them weights W such that W is the smoothed probability of the token appearing in the document. |
double |
score(StringWrapper s,
StringWrapper t)
Jensen-Shannon distance between distributions. |
protected abstract double |
smoothedProbability(Token tok,
double freq,
double totalWeight)
Smoothed probability of the token with frequency freq in a bag with the given totalWeight |
void |
train(StringWrapperIterator i)
Accumulate statistics on how often each token occurs. |
Methods inherited from class com.wcohen.ss.AbstractTokenizedStringDistance |
---|
asBagOfTokens, prepare, setStringWrapperPool |
Methods inherited from class com.wcohen.ss.AbstractStringDistance |
---|
addExample, doMain, explainScore, getDistance, hasNextQuery, nextQuery, prepare, score, setDistanceInstancePool |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
---|
public JensenShannonDistance(Tokenizer tokenizer)
public JensenShannonDistance()
Method Detail |
---|
public final void train(StringWrapperIterator i)
train
in class AbstractTokenizedStringDistance
public final StringWrapper prepare(java.lang.String s)
prepare
in interface StringDistance
prepare
in class AbstractStringDistance
protected abstract double smoothedProbability(Token tok, double freq, double totalWeight)
protected double backgroundProb(Token tok)
public final double score(StringWrapper s, StringWrapper t)
score
in interface StringDistance
score
in class AbstractStringDistance
public final java.lang.String explainScore(StringWrapper s, StringWrapper t)
AbstractStringDistance
explainScore
in interface StringDistance
explainScore
in class AbstractStringDistance
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |