Sunday, January 6, 2013

Convert Weka Instances to Mallet InstanceList

If you ever come across this issue the following code snippet may help you:
 /**  
    * Converts Weka Instances to Mallet InstanceList  
    * @param instances Weka instances  
    * @return Mallet instanceList  
    */  
   public static InstanceList wekaInstances2MalletInstanceList(Instances instances) {  
     Alphabet dataAlphabet = new Alphabet();  
     LabelAlphabet targetAlphabet = new LabelAlphabet();  
     InstanceList instanceList = new InstanceList(new Noop(dataAlphabet, targetAlphabet));  
     int classIndex = instances.classIndex();  
     int numAttributes = instances.numAttributes();      
     for (int i = 0; i < numAttributes; i++) {  
       if (i == classIndex) {  
         continue;  
       }  
       Attribute attribute = instances.attribute(i);  
       dataAlphabet.lookupIndex(attribute.name());        
     }  
     Attribute classAttribute = instances.attribute(classIndex);  
     int numClasses = classAttribute.numValues();      
     for (int i = 0; i < numClasses; i++) {        
       targetAlphabet.lookupLabel(classAttribute.value(i));  
     }  
     int numInstance = instances.numInstances();  
     for (int i = 0; i < numInstance; i++) {  
       weka.core.Instance instance = instances.instance(i);  
       double[] values = instance.toDoubleArray();  
       int indices[] = new int[numAttributes];  
       int count = 0;  
       for (int j = 0; j < values.length; j++) {  
         if (j != classIndex && values[j] != 0.0) {  
           values[count] = values[j];  
           indices[count] = j;  
           count++;  
         }  
       }  
       indices = Arrays.copyOf(indices, count);  
       values = Arrays.copyOf(values, count);  
       FeatureVector fv = new FeatureVector(dataAlphabet, indices, values);  
       String classValue = instance.stringValue(classIndex);  
       Label classLabel = targetAlphabet.lookupLabel(classValue);  
       Instance malletInstance = new Instance(fv, classLabel, null, null);  
       instanceList.addThruPipe(malletInstance);  
     }  
     return instanceList;  
   }