1
2
3
4
5
6
7
8
9
10#include <stdio.h>
11#include <stdlib.h>
12#include <sys/types.h>
13#include <sys/stat.h>
14#include <fcntl.h>
15#include <unistd.h>
16#include <string.h>
17
18struct block_list {
19 char *txt;
20 int len;
21 int num;
22};
23
24
25static struct block_list *list;
26static int list_size;
27static int max_size;
28
29struct block_list *block_head;
30
31int read_block(char *buf, int buf_size, FILE *fin)
32{
33 char *curr = buf, *const buf_end = buf + buf_size;
34
35 while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) {
36 if (*curr == '\n')
37 return curr - buf;
38 curr += strlen(curr);
39 }
40
41 return -1;
42}
43
44static int compare_txt(const void *p1, const void *p2)
45{
46 const struct block_list *l1 = p1, *l2 = p2;
47
48 return strcmp(l1->txt, l2->txt);
49}
50
51static int compare_num(const void *p1, const void *p2)
52{
53 const struct block_list *l1 = p1, *l2 = p2;
54
55 return l2->num - l1->num;
56}
57
58static void add_list(char *buf, int len)
59{
60 if (list_size != 0 &&
61 len == list[list_size-1].len &&
62 memcmp(buf, list[list_size-1].txt, len) == 0) {
63 list[list_size-1].num++;
64 return;
65 }
66 if (list_size == max_size) {
67 printf("max_size too small??\n");
68 exit(1);
69 }
70 list[list_size].txt = malloc(len+1);
71 list[list_size].len = len;
72 list[list_size].num = 1;
73 memcpy(list[list_size].txt, buf, len);
74 list[list_size].txt[len] = 0;
75 list_size++;
76 if (list_size % 1000 == 0) {
77 printf("loaded %d\r", list_size);
78 fflush(stdout);
79 }
80}
81
82#define BUF_SIZE (128 * 1024)
83
84int main(int argc, char **argv)
85{
86 FILE *fin, *fout;
87 char *buf;
88 int ret, i, count;
89 struct block_list *list2;
90 struct stat st;
91
92 if (argc < 3) {
93 printf("Usage: ./program <input> <output>\n");
94 perror("open: ");
95 exit(1);
96 }
97
98 fin = fopen(argv[1], "r");
99 fout = fopen(argv[2], "w");
100 if (!fin || !fout) {
101 printf("Usage: ./program <input> <output>\n");
102 perror("open: ");
103 exit(1);
104 }
105
106 fstat(fileno(fin), &st);
107 max_size = st.st_size / 100;
108
109 list = malloc(max_size * sizeof(*list));
110 buf = malloc(BUF_SIZE);
111 if (!list || !buf) {
112 printf("Out of memory\n");
113 exit(1);
114 }
115
116 for ( ; ; ) {
117 ret = read_block(buf, BUF_SIZE, fin);
118 if (ret < 0)
119 break;
120
121 add_list(buf, ret);
122 }
123
124 printf("loaded %d\n", list_size);
125
126 printf("sorting ....\n");
127
128 qsort(list, list_size, sizeof(list[0]), compare_txt);
129
130 list2 = malloc(sizeof(*list) * list_size);
131
132 printf("culling\n");
133
134 for (i = count = 0; i < list_size; i++) {
135 if (count == 0 ||
136 strcmp(list2[count-1].txt, list[i].txt) != 0) {
137 list2[count++] = list[i];
138 } else {
139 list2[count-1].num += list[i].num;
140 }
141 }
142
143 qsort(list2, count, sizeof(list[0]), compare_num);
144
145 for (i = 0; i < count; i++)
146 fprintf(fout, "%d times:\n%s\n", list2[i].num, list2[i].txt);
147
148 return 0;
149}
150