Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

import cPickle 

import os 

import re 

import sys 

import argparse 

from collections import defaultdict 

from itertools import chain, izip_longest 

 

 

from mtools.util.logcodeline import LogCodeLine 

import mtools 

 

def import_l2c_db(): 

    """ static import helper function, checks if the log2code.pickle exists first, otherwise 

        raises ImportError.  

    """ 

    data_path = os.path.join(os.path.dirname(mtools.__file__), 'data') 

    if os.path.exists(os.path.join(data_path, 'log2code.pickle')): 

        av, lv, lbw, lcl = cPickle.load(open(os.path.join(data_path, 'log2code.pickle'), 'rb')) 

        return av, lv, lbw, lcl 

    else: 

 

        raise ImportError('log2code.pickle not found in %s.'%data_path) 

 

 

 

 

class Log2CodeConverter(object): 

 

    # static import of logdb data structures 

    all_versions, log_version, logs_by_word, log_code_lines = import_l2c_db() 

 

    def _log2code(self, line): 

        tokens = re.split(r'[\s"]', line) 

 

        # find first word in first 20 tokens that has a corresponding log message stored 

        for word_no, word in enumerate(w for w in tokens if w in self.logs_by_word): 

 

            # go through all error messages starting with this word 

            coverage = [] 

            for log in self.logs_by_word[word]: 

 

                if all([line.find(token) >= 0 for token in log]): 

                    # all tokens match, calculate coverage 

                    cov = sum([len(token) for token in log]) 

                    coverage.append(cov) 

                else: 

                    coverage.append(0) 

 

            best_cov = max(coverage) 

            if not best_cov: 

                continue 

 

            if word_no > 20: 

                # avoid parsing really long lines. If the log message didn't start within the 

                # first 20 words, it's probably not a known message 

                return None 

 

                # # no match found, may have been a named log level. try next word 

                # if word in ["warning:", "ERROR:", "SEVERE:", "UNKNOWN:"]: 

                #     continue 

                # else: 

                #     # duration = time.time() - start_time 

                #     # print duration 

                #     continue 

 

            best_match = self.logs_by_word[word][coverage.index(best_cov)] 

            return self.log_code_lines[best_match] 

 

    def _strip_counters(self, sub_line): 

        """ finds the ending part of the codeline by  

            taking out the counters and durations 

        """ 

        try: 

            end = sub_line.rindex('}') 

        except ValueError, e: 

            return sub_line 

        else: 

            return sub_line[:(end + 1)] 

 

    def _strip_datetime(self,sub_line): 

        """ strip out datetime and other parts so that 

            there is no redundancy 

        """ 

        try: 

            begin = sub_line.index(']') 

        except ValueError, e: 

            return sub_line 

        else: 

            # create a "" in place character for the beginnings.. 

            # needed when interleaving the lists 

            sub = sub_line[begin + 1:] 

            return sub 

 

 

    def _find_variable(self, pattern, logline): 

        """ return the variable parts of the code  

            given a tuple of strings pattern 

            ie. (this, is, a, pattern) -> 'this is a good pattern' -> [good] 

        """ 

        var_subs = [] 

        # find the beginning of the pattern 

        first_index = logline.index(pattern[0]) 

        beg_str = logline[:first_index] 

        #strip the beginning substring 

        var_subs.append(self._strip_datetime(beg_str)) 

 

        for patt, patt_next in zip(pattern[:-1], pattern[1:]): 

            # regular expression pattern that finds what's in the middle of two substrings 

            pat = re.escape(patt) + '(.*)' + re.escape(patt_next) 

            # extract whats in the middle of the two substrings 

            between = re.search(pat, logline) 

            try: 

                # add what's in between if the search isn't none  

                var_subs.append(between.group(1)) 

            except Exception, e: 

                pass 

        rest_of_string = logline.rindex(pattern[-1]) + len(pattern[-1]) 

 

        # add the rest of the string to the end minus the counters and durations 

        end_str = logline[rest_of_string:] 

        var_subs.append(self._strip_counters(end_str)) 

 

        # strip whitespace from each string, but keep the strings themselves 

        # var_subs = [v.strip() for v in var_subs] 

 

        return var_subs 

 

    def _variable_parts(self, line, codeline): 

        """returns the variable parts of the codeline,  

            given the static parts 

        """ 

        var_subs = [] 

        # codeline has the pattern and then has the outputs in different versions 

        if codeline: 

            var_subs = self._find_variable(codeline.pattern, line) 

        else: 

            # make the variable part of the line string without all the other stuff 

            line_str= self._strip_datetime(self._strip_counters(line)) 

            var_subs= [line_str.strip()] 

        return var_subs 

 

    def __call__(self, line, variable=False): 

        """ returns a tuple of the log2code and variable parts 

            when the class is called 

        """ 

 

        if variable: 

            log2code = self._log2code(line) 

            return log2code, self._variable_parts(line,log2code) 

        else: 

            return self._log2code(line), None 

 

 

    def combine(self, pattern, variable): 

        """ combines a pattern and variable parts to be a line string again. """ 

 

        inter_zip= izip_longest(variable, pattern, fillvalue='') 

        interleaved = [elt for pair in inter_zip for elt in pair ] 

        return ''.join(interleaved) 

 

 

 

 

 

 

# class MLog2Code(object): 

 

#     def __init__(self): 

#         self._import_l2c_db() 

#         self._parse_args() 

#         self.analyse() 

 

#     def _import_l2c_db(self): 

#         self.all_versions, self.logs_versions, self.logs_by_word, self.log_code_lines = \ 

#             cPickle.load(open('./logdb.pickle', 'rb')) 

 

#     def _parse_args(self): 

#         # create parser object 

#         parser = argparse.ArgumentParser(description='mongod/mongos log file to code line converter (BETA)') 

 

#         # only create default argument if not using stdin 

#         if sys.stdin.isatty(): 

#             parser.add_argument('logfile', action='store', help='looks up and prints out information about where a log line originates from the code.') 

 

#         self.args = vars(parser.parse_args()) 

 

#     def analyse(self): 

#         # open logfile 

#         if sys.stdin.isatty(): 

#             logfile = open(self.args['logfile'], 'r') 

#         else: 

#             logfile = sys.stdin 

 

#         for i, line in enumerate(logfile):  

#             match = self.log2code(line) 

 

#             if  match: 

#                 print line, 

#                 print self.logs_versions[match] 

#                 print self.log_code_lines[match] 

 

 

#     def log2code(self, line): 

#         tokens = line.split() 

 

#         # find first word in line that has a corresponding log message stored 

#         word = next((w for w in tokens if w in self.logs_by_word), None) 

#         if not word: 

#             return None 

 

#         # go through all error messages starting with this word 

#         coverage = [] 

#         for log in self.logs_by_word[word]: 

 

#             if all([line.find(token) >= 0 for token in log]): 

#                 # all tokens match, calculate coverage 

#                 cov = sum([len(token) for token in log]) 

#                 coverage.append(cov) 

#             else: 

#                 coverage.append(0) 

 

#         best_cov = max(coverage) 

#         if not best_cov: 

#             return None 

 

#         best_match = self.logs_by_word[word][coverage.index(best_cov)] 

#         return best_match 

 

 

 

# if __name__ == '__main__': 

#         l2cc = Log2CodeConverter() 

#         lcl = l2cc("""Sun Mar 24 00:44:16.295 [conn7815] moveChunk migrate commit accepted by TO-shard: { active: true, ns: "db.coll", from: "shard001:27017", min: { i: ObjectId('4b7730748156791f310b03a3'), m: "stats", t: new Date(1348272000000) }, max: { i: ObjectId('4b8f826192f9e2154d05dda7'), m: "mongo", t: new Date(1345680000000) }, shardKeyPattern: { i: 1.0, m: 1.0, t: 1.0 }, state: "done", counts: { cloned: 3115, clonedBytes: 35915282, catchup: 0, steady: 0 }, ok: 1.0 }""") 

#         print lcl.versions 

 

        #possible_versions = possible_versions & set(logs_versions[best_match]) 

 

 

        # if len(possible_versions) != old_num_v: 

        #     print i, line.rstrip() 

        #     print "    best_match:", best_match 

        #     print "    log message only present in versions:", logs_versions[best_match] 

        #     print "    this limits the possible versions to:", possible_versions 

        #     print 

 

        # if not possible_versions: 

        #     raise SystemExit 

 

 

    # print "possible versions:", ", ".join([pv[1:] for pv in possible_versions]) 

    # for pv in possible_versions: 

    #     print pv, possible_versions[pv] 

 

    # plt.bar(range(len(possible_versions.values())), possible_versions.values(), align='center') 

    # plt.xticks(range(len(possible_versions.keys())), possible_versions.keys(), size='small', rotation=90) 

    # plt.show()