import string from pyparsing import alphas,nums, alphanums, Literal, Combine, Word, Group, Suppress, OneOrMore, delimitedList, ZeroOrMore, Optional testdata = """ <133> Apr 1 00:00:00 server1 snort[32268]: [1:1983:6] BACKDOOR DeepThroat 3.1 Connection attempt [Classification: A Network Trojan was detected] [Priority: 1]: {UDP} 10.1.1.1:161 -> 192.168.1.1:4120 """ logLineBNF = None def getLogLineBNF(): global logLineBNF if logLineBNF is None: integer = Word( nums ) ipAddress = delimitedList( integer, ".", combine=True ) #timeZoneOffset = Word("+-",nums) code = Suppress(Group(Combine('<' + Word(nums) + '>' + ' '))) month = Word(string.uppercase, string.lowercase, exact=3) serverDateTime = Group(Combine( month + ' '+ integer + ' ' + integer + ":" + integer + ":" + integer ) ) serverName = Word(alphanums) misc_code = Suppress(Combine(Suppress(Word(alphas) + '[') + Word(nums) + Suppress(']' + ':'))) snortId = Combine(Suppress('[') + Word(nums) + ':' + Word(nums) + Suppress(':' + Word(nums) + ']')) description = ZeroOrMore('(' + Word(alphanums+'.-_ []') + ')') + OneOrMore( Word(alphanums+'-./>!:$_ ')) + ZeroOrMore(Suppress('[') + Word(nums) + ']') classification = OneOrMore('[' + Suppress(Word(alphas) + ': ') + OneOrMore(Word(alphanums)) + ']' ).setParseAction( lambda tokens : (tokens[-2])) priority = Suppress(':') code_1 = Suppress(Group(Combine('<' + Word(alphanums) + '>' + ' '))) proto = Suppress('{') + Word(alphanums+':') + Suppress('}') src = delimitedList( integer, ".", combine=True ) src_prt = Optional(ZeroOrMore(Suppress(':') + Word(nums)),default='_') out = Suppress(Literal("->")) dst = delimitedList( integer, ".", combine=True ) dst_prt = Optional(ZeroOrMore(Suppress(':') + Word(nums)),default='_') logLineBNF = ( code.setResultsName("code") + serverDateTime.setResultsName("timestamp") + serverName.setResultsName("host") + misc_code.setResultsName("misc") + snortId.setResultsName("snort_id") + description("description") + classification("classification") + priority("priority") + code_1("asdf") + proto("proto") + src("src") + src_prt("src_prt") + out("out") + dst("dst") + dst_prt("dst_prt") ) return logLineBNF for line in testdata.split("\n"): if not line: continue fields = getLogLineBNF().parseString(line) print fields.dump() #for k in fields.keys(): # print(''.join(fields[k]) + ','),