1
2
3
4
5
6
7
8
9
10
11
12#include <stdio.h>
13#include <stdlib.h>
14#include <sys/types.h>
15#include <sys/stat.h>
16#include <fcntl.h>
17#include <unistd.h>
18#include <string.h>
19
20struct block_list {
21 char *txt;
22 int len;
23 int num;
24};
25
26
27static struct block_list *list;
28static int list_size;
29static int max_size;
30
31struct block_list *block_head;
32
33int read_block(char *buf, int buf_size, FILE *fin)
34{
35 char *curr = buf, *const buf_end = buf + buf_size;
36
37 while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) {
38 if (*curr == '\n')
39 return curr - buf;
40 if (!strncmp(curr, "PFN", 3))
41 continue;
42 curr += strlen(curr);
43 }
44
45 return -1;
46}
47
48static int compare_txt(const void *p1, const void *p2)
49{
50 const struct block_list *l1 = p1, *l2 = p2;
51
52 return strcmp(l1->txt, l2->txt);
53}
54
55static int compare_num(const void *p1, const void *p2)
56{
57 const struct block_list *l1 = p1, *l2 = p2;
58
59 return l2->num - l1->num;
60}
61
62static void add_list(char *buf, int len)
63{
64 if (list_size != 0 &&
65 len == list[list_size-1].len &&
66 memcmp(buf, list[list_size-1].txt, len) == 0) {
67 list[list_size-1].num++;
68 return;
69 }
70 if (list_size == max_size) {
71 printf("max_size too small??\n");
72 exit(1);
73 }
74 list[list_size].txt = malloc(len+1);
75 list[list_size].len = len;
76 list[list_size].num = 1;
77 memcpy(list[list_size].txt, buf, len);
78 list[list_size].txt[len] = 0;
79 list_size++;
80 if (list_size % 1000 == 0) {
81 printf("loaded %d\r", list_size);
82 fflush(stdout);
83 }
84}
85
86#define BUF_SIZE (128 * 1024)
87
88int main(int argc, char **argv)
89{
90 FILE *fin, *fout;
91 char *buf;
92 int ret, i, count;
93 struct block_list *list2;
94 struct stat st;
95
96 if (argc < 3) {
97 printf("Usage: ./program <input> <output>\n");
98 perror("open: ");
99 exit(1);
100 }
101
102 fin = fopen(argv[1], "r");
103 fout = fopen(argv[2], "w");
104 if (!fin || !fout) {
105 printf("Usage: ./program <input> <output>\n");
106 perror("open: ");
107 exit(1);
108 }
109
110 fstat(fileno(fin), &st);
111 max_size = st.st_size / 100;
112
113 list = malloc(max_size * sizeof(*list));
114 buf = malloc(BUF_SIZE);
115 if (!list || !buf) {
116 printf("Out of memory\n");
117 exit(1);
118 }
119
120 for ( ; ; ) {
121 ret = read_block(buf, BUF_SIZE, fin);
122 if (ret < 0)
123 break;
124
125 add_list(buf, ret);
126 }
127
128 printf("loaded %d\n", list_size);
129
130 printf("sorting ....\n");
131
132 qsort(list, list_size, sizeof(list[0]), compare_txt);
133
134 list2 = malloc(sizeof(*list) * list_size);
135 if (!list2) {
136 printf("Out of memory\n");
137 exit(1);
138 }
139
140 printf("culling\n");
141
142 for (i = count = 0; i < list_size; i++) {
143 if (count == 0 ||
144 strcmp(list2[count-1].txt, list[i].txt) != 0) {
145 list2[count++] = list[i];
146 } else {
147 list2[count-1].num += list[i].num;
148 }
149 }
150
151 qsort(list2, count, sizeof(list[0]), compare_num);
152
153 for (i = 0; i < count; i++)
154 fprintf(fout, "%d times:\n%s\n", list2[i].num, list2[i].txt);
155
156 return 0;
157}
158