Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

from mtools.util import OrderedDict 

import re 

 

class Grouping(object): 

 

    def __init__(self, iterable=None, group_by=None): 

        self.groups = {} 

        self.group_by = group_by 

 

        if iterable: 

            for item in iterable: 

                self.add(item, group_by) 

 

 

    def add(self, item, group_by=None): 

        """ General purpose class to group items by certain criteria. """ 

 

        key = None 

 

        if not group_by: 

            group_by = self.group_by 

 

        if group_by: 

            # if group_by is a function, use it with item as argument 

            if hasattr(group_by, '__call__'): 

                key = group_by(item) 

 

            # if the item has attribute of group_by as string, use that as key 

            elif isinstance(group_by, str) and hasattr(item, group_by): 

                key = getattr(item, group_by) 

 

            else: 

                key = None 

                # try to match str(item) with regular expression 

                if isinstance(group_by, str): 

                    match = re.search(group_by, str(item)) 

                    if match: 

                        if len(match.groups()) > 0: 

                            key = match.group(1) 

                        else: 

                            key = match.group() 

 

        self.groups.setdefault(key, list()).append(item) 

 

 

    def __getitem__(self, key): 

        return self.groups[key] 

 

    def __iter__(self): 

        for key in self.groups: 

            yield key 

 

    def __len__(self): 

        return len(self.groups) 

 

    def keys(self): 

        return self.groups.keys() 

 

    def values(self): 

        return self.groups.values() 

 

    def items(self): 

        return self.groups.items() 

 

 

    def regroup(self, group_by=None): 

        if not group_by: 

            group_by = self.group_by 

 

        groups = self.groups 

        self.groups = {} 

 

        for g in groups: 

            for item in groups[g]: 

                self.add(item, group_by) 

 

 

    def move_items(self, from_group, to_group): 

        """ will take all elements from the from_group and add it to the to_group. """ 

        if from_group not in self.keys() or len(self.groups[from_group]) == 0: 

            return 

 

        self.groups.setdefault(to_group, list()).extend(self.groups.get(from_group, list())) 

        if from_group in self.groups: 

            del self.groups[from_group] 

 

 

    def sort_by_size(self, group_limit=None, discard_others=False, others_label='others'): 

        """ sorts the groups by the number of elements they contain, descending. Also has option to  

            limit the number of groups. If this option is chosen, the remaining elements are placed 

            into another group with the name specified with others_label. if discard_others is True, 

            the others group is removed instead. 

        """ 

 

        # sort groups by number of elements 

        self.groups = OrderedDict( sorted(self.groups.iteritems(), key=lambda x: len(x[1]), reverse=True) ) 

 

        # if group-limit is provided, combine remaining groups 

        if group_limit != None: 

 

            # now group together all groups that did not make the limit 

            if not discard_others: 

                group_keys = self.groups.keys()[ group_limit-1: ] 

                self.groups.setdefault(others_label, list()) 

            else: 

                group_keys = self.groups.keys()[ group_limit: ] 

 

            # only go to second last (-1), since the 'others' group is now last 

            for g in group_keys: 

                if not discard_others: 

                    self.groups[others_label].extend(self.groups[g]) 

                del self.groups[g] 

 

            # remove if empty 

            if others_label in self.groups and len(self.groups[others_label]) == 0: 

                del self.groups[others_label] 

 

        # remove others group regardless of limit if requested 

        if discard_others and others_label in self.groups: 

            del self.groups[others_label] 

 

 

 

if __name__ == '__main__': 

    # Example 

    items = [1, 4, 3, 5, 7, 8, 6, 7, 9, 8, 6, 4, 2, 3, 3, 0] 

 

    grouping = Grouping(items, r'[3, 4, 5, 6, 7]') 

    grouping.sort_by_size(group_limit=1, discard_others=True) 

    # grouping.move_items('no match', 'foo') 

 

    grouping.regroup(lambda x: 'even' if x % 2 == 0 else 'odd') 

 

    for g in grouping: 

        print g, grouping[g]