java - Getting incorrect Score using SentiWordNet -
i'm doing sentiment analysis using sentiwordnet , referred post here how use sentiwordnet . however, i'm getting score of 0.0 despite trying out various inputs. there i'm doing wrong here? thanks!
import java.io.bufferedreader; import java.io.file; import java.io.filereader; import java.util.hashmap; import java.util.iterator; import java.util.set; import java.util.vector; public class swn3 { private string pathtoswn = "c:\\users\\malcolm\\desktop\\sentiwordnet_3.0.0\\home\\swn\\www\\admin\\dump\\sentiwordnet_3.0.0.txt"; private hashmap<string, double> _dict; public swn3(){ _dict = new hashmap<string, double>(); hashmap<string, vector<double>> _temp = new hashmap<string, vector<double>>(); try{ bufferedreader csv = new bufferedreader(new filereader(pathtoswn)); string line = ""; while((line = csv.readline()) != null) { string[] data = line.split("\t"); double score = double.parsedouble(data[2])-double.parsedouble(data[3]); string[] words = data[4].split(" "); for(string w:words) { string[] w_n = w.split("#"); w_n[0] += "#"+data[0]; int index = integer.parseint(w_n[1])-1; if(_temp.containskey(w_n[0])) { vector<double> v = _temp.get(w_n[0]); if(index>v.size()) for(int = v.size();i<index; i++) v.add(0.0); v.add(index, score); _temp.put(w_n[0], v); } else { vector<double> v = new vector<double>(); for(int = 0;i<index; i++) v.add(0.0); v.add(index, score); _temp.put(w_n[0], v); } } } set<string> temp = _temp.keyset(); (iterator<string> iterator = temp.iterator(); iterator.hasnext();) { string word = (string) iterator.next(); vector<double> v = _temp.get(word); double score = 0.0; double sum = 0.0; for(int = 0; < v.size(); i++) score += ((double)1/(double)(i+1))*v.get(i); for(int = 1; i<=v.size(); i++) sum += (double)1/(double)i; score /= sum; string sent = ""; if(score>=0.75) sent = "strong_positive"; else if(score > 0.25 && score<=0.5) sent = "positive"; else if(score > 0 && score>=0.25) sent = "weak_positive"; else if(score < 0 && score>=-0.25) sent = "weak_negative"; else if(score < -0.25 && score>=-0.5) sent = "negative"; else if(score<=-0.75) sent = "strong_negative"; _dict.put(word, score); } } catch(exception e){e.printstacktrace();} } public double extract(string word) { double total = new double(0); if(_dict.get(word+"#n") != null) total = _dict.get(word+"#n") + total; if(_dict.get(word+"#a") != null) total = _dict.get(word+"#a") + total; if(_dict.get(word+"#r") != null) total = _dict.get(word+"#r") + total; if(_dict.get(word+"#v") != null) total = _dict.get(word+"#v") + total; return total; } public static void main(string[] args) { swn3 test = new swn3(); string sentence="hello have super awesome great day"; string[] words = sentence.split("\\s+"); double totalscore = 0; for(string word : words) { word = word.replaceall("([^a-za-z\\s])", ""); if (test.extract(word) == null) continue; totalscore += test.extract(word); } system.out.println(totalscore); } } here's first 10 lines of sentiwordnet.txt
a 00001740 0.125 0 able#1 (usually followed `to') having necessary means or skill or know-how or authority something; "able swim"; "she able program computer"; "we @ last able buy car"; "able grant project" 00002098 0 0.75 unable#1 (usually followed `to') not having necessary means or skill or know-how; "unable town without car"; "unable obtain funds" 00002312 0 0 dorsal#2 abaxial#1 facing away axis of organ or organism; "the abaxial surface of leaf underside or side facing away stem" 00002527 0 0 ventral#2 adaxial#1 nearest or facing toward axis of organ or organism; "the upper side of leaf known adaxial surface" 00002730 0 0 acroscopic#1 facing or on side toward apex 00002843 0 0 basiscopic#1 facing or on side toward base 00002956 0 0 abducting#1 abducent#1 of muscles; drawing away midline of body or adjacent part 00003131 0 0 adductive#1 adducting#1 adducent#1 of muscles; bringing or drawing toward midline of body or toward adjacent part 00003356 0 0 nascent#1 being born or beginning; "the nascent chicks"; "a nascent insurgency" 00003553 0 0 emerging#2 emergent#2 coming existence; "an emergent republic"
usually sentiword.txt file comes weird format.
you need remove first part of (which includes comments , instructions) , last 2 lines:
# empty line the parser doesn't know how handle these situations, if delete these 2 lines you'll fine.
Comments
Post a Comment