|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object com.wcohen.ss.AbstractStringDistance com.wcohen.ss.AbstractTokenizedStringDistance com.wcohen.ss.AbstractStatisticalTokenDistance com.wcohen.ss.TFIDF
public class TFIDF
TFIDF-based distance metric.
Nested Class Summary | |
---|---|
protected class |
TFIDF.UnitVector
Marker class extending BagOfTokens |
Field Summary |
---|
Fields inherited from class com.wcohen.ss.AbstractStatisticalTokenDistance |
---|
collectionSize, documentFrequency, totalTokenCount |
Fields inherited from class com.wcohen.ss.AbstractTokenizedStringDistance |
---|
tokenizer |
Constructor Summary | |
---|---|
TFIDF()
|
|
TFIDF(Tokenizer tokenizer)
|
Method Summary | |
---|---|
protected TFIDF.UnitVector |
asUnitVector(StringWrapper w)
|
java.lang.String |
explainScore(StringWrapper s,
StringWrapper t)
Explain how the distance was computed. |
int |
getCollectionSize()
|
int |
getDocumentFrequency(Token token)
Get the document frequency of the token. |
Token[] |
getTokens()
Access the tokens of the last prepare()-ed string. |
double |
getWeight(Token token)
Access the weight of a token in the vector created for the last prepare()-ed string. |
static void |
main(java.lang.String[] argv)
|
StringWrapper |
prepare(java.lang.String s)
Preprocess a string by finding tokens and giving them TFIDF weights |
double |
score(StringWrapper s,
StringWrapper t)
This method needs to be implemented by subclasses. |
void |
setCollectionSize(int n)
Setting the collectionSize and alsoSet the size of the collection that this TFIDF measure was trained on to some value. |
void |
setDocumentFrequency(Token token,
int df)
Set the document frequency of the token to some value. |
java.lang.String |
toString()
|
Methods inherited from class com.wcohen.ss.AbstractStatisticalTokenDistance |
---|
checkTrainingHasHappened, train |
Methods inherited from class com.wcohen.ss.AbstractTokenizedStringDistance |
---|
asBagOfTokens, prepare, setStringWrapperPool |
Methods inherited from class com.wcohen.ss.AbstractStringDistance |
---|
addExample, doMain, explainScore, getDistance, hasNextQuery, nextQuery, prepare, score, setDistanceInstancePool |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Constructor Detail |
---|
public TFIDF(Tokenizer tokenizer)
public TFIDF()
Method Detail |
---|
public double score(StringWrapper s, StringWrapper t)
AbstractStringDistance
score
in interface StringDistance
score
in class AbstractStringDistance
protected TFIDF.UnitVector asUnitVector(StringWrapper w)
public StringWrapper prepare(java.lang.String s)
prepare
in interface StringDistance
prepare
in class AbstractStringDistance
public Token[] getTokens()
public double getWeight(Token token)
public int getDocumentFrequency(Token token)
getDocumentFrequency
in class AbstractStatisticalTokenDistance
public void setDocumentFrequency(Token token, int df)
public int getCollectionSize()
public void setCollectionSize(int n)
public java.lang.String explainScore(StringWrapper s, StringWrapper t)
explainScore
in interface StringDistance
explainScore
in class AbstractStringDistance
public java.lang.String toString()
toString
in class java.lang.Object
public static void main(java.lang.String[] argv)
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |