1
|
/*
|
2
|
Copyright (c) 2003-2006 Niels Kokholm and Peter Sestoft
|
3
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
of this software and associated documentation files (the "Software"), to deal
|
5
|
in the Software without restriction, including without limitation the rights
|
6
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
copies of the Software, and to permit persons to whom the Software is
|
8
|
furnished to do so, subject to the following conditions:
|
9
|
|
10
|
The above copyright notice and this permission notice shall be included in
|
11
|
all copies or substantial portions of the Software.
|
12
|
|
13
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
SOFTWARE.
|
20
|
*/
|
21
|
|
22
|
// C5 example: Find and print the most common words in a text file.
|
23
|
// Programming pearl by D.E. Knuth in CACM 29 (June 1986) 471-483.
|
24
|
|
25
|
// Compile with
|
26
|
// csc /r:C5.dll Commonwords.cs
|
27
|
|
28
|
using System; // Console
|
29
|
using System.IO; // StreamReader, TextReader
|
30
|
using System.Text.RegularExpressions; // Regex
|
31
|
using C5; // IDictionary, TreeDictionary, TreeSet
|
32
|
using SCG = System.Collections.Generic; // IComparer
|
33
|
|
34
|
namespace Commonwords {
|
35
|
class Commonwords {
|
36
|
static void Main(String[] args) {
|
37
|
if (args.Length != 2)
|
38
|
Console.WriteLine("Usage: Commonwords <maxwords> <filename>\n");
|
39
|
else
|
40
|
PrintMostCommon(int.Parse(args[0]), args[1]);
|
41
|
}
|
42
|
|
43
|
static void PrintMostCommon(int maxWords, String filename) {
|
44
|
ICollection<String> wordbag = new HashBag<String>();
|
45
|
Regex delim = new Regex("[^a-zA-Z0-9]+");
|
46
|
using (TextReader rd = new StreamReader(filename)) {
|
47
|
for (String line = rd.ReadLine(); line != null; line = rd.ReadLine())
|
48
|
foreach (String s in delim.Split(line))
|
49
|
if (s != "")
|
50
|
wordbag.Add(s);
|
51
|
}
|
52
|
KeyValuePair<String,int>[] frequency
|
53
|
= wordbag.ItemMultiplicities().ToArray();
|
54
|
// Sorting.IntroSort(frequency, 0, frequency.Length, new FreqOrder());
|
55
|
Sorting.IntroSort(frequency, 0, frequency.Length,
|
56
|
new DelegateComparer<KeyValuePair<String,int>>
|
57
|
(delegate(KeyValuePair<String,int> p1,
|
58
|
KeyValuePair<String,int> p2)
|
59
|
{
|
60
|
int major = p2.Value.CompareTo(p1.Value);
|
61
|
return major != 0 ? major : p1.Key.CompareTo(p2.Key);
|
62
|
}));
|
63
|
int stop = Math.Min(frequency.Length, maxWords);
|
64
|
for (int i=0; i<stop; i++) {
|
65
|
KeyValuePair<String,int> p = frequency[i];
|
66
|
Console.WriteLine("{0,4} occurrences of {1}", p.Value, p.Key);
|
67
|
}
|
68
|
}
|
69
|
|
70
|
// Lexicographic ordering: decreasing frequency, then increasing string
|
71
|
|
72
|
class FreqOrder : SCG.IComparer<KeyValuePair<String,int>> {
|
73
|
public int Compare(KeyValuePair<String,int> p1,
|
74
|
KeyValuePair<String,int> p2) {
|
75
|
int major = p2.Value.CompareTo(p1.Value);
|
76
|
return major != 0 ? major : p1.Key.CompareTo(p2.Key);
|
77
|
}
|
78
|
}
|
79
|
}
|
80
|
}
|