If you ever come across this issue the following code snippet may help you:
/**
* Converts Weka Instances to Mallet InstanceList
* @param instances Weka instances
* @return Mallet instanceList
*/
public static InstanceList wekaInstances2MalletInstanceList(Instances instances) {
Alphabet dataAlphabet = new Alphabet();
LabelAlphabet targetAlphabet = new LabelAlphabet();
InstanceList instanceList = new InstanceList(new Noop(dataAlphabet, targetAlphabet));
int classIndex = instances.classIndex();
int numAttributes = instances.numAttributes();
for (int i = 0; i < numAttributes; i++) {
if (i == classIndex) {
continue;
}
Attribute attribute = instances.attribute(i);
dataAlphabet.lookupIndex(attribute.name());
}
Attribute classAttribute = instances.attribute(classIndex);
int numClasses = classAttribute.numValues();
for (int i = 0; i < numClasses; i++) {
targetAlphabet.lookupLabel(classAttribute.value(i));
}
int numInstance = instances.numInstances();
for (int i = 0; i < numInstance; i++) {
weka.core.Instance instance = instances.instance(i);
double[] values = instance.toDoubleArray();
int indices[] = new int[numAttributes];
int count = 0;
for (int j = 0; j < values.length; j++) {
if (j != classIndex && values[j] != 0.0) {
values[count] = values[j];
indices[count] = j;
count++;
}
}
indices = Arrays.copyOf(indices, count);
values = Arrays.copyOf(values, count);
FeatureVector fv = new FeatureVector(dataAlphabet, indices, values);
String classValue = instance.stringValue(classIndex);
Label classLabel = targetAlphabet.lookupLabel(classValue);
Instance malletInstance = new Instance(fv, classLabel, null, null);
instanceList.addThruPipe(malletInstance);
}
return instanceList;
}
No comments:
Post a Comment