#!/usr/bin/perl -w use strict; ###################################################################### # Variables # ###################################################################### # File with the list of domains to process. The format of the file should be: # .xx Country name # .xy Some other country # .xz Yet another country my $tlds = 'tlds.txt'; # Dictionary file. Words are assumed to be sorted alphabetically, one per line my $dictionary = '/usr/share/dict/linux.words'; # Minimum length of word to consider (3 letters, 2 of which are from the domain) my $min_length = 3; # Words that start with a match to this regexp will be ignored my $ignore_start_with = qr/-/; # Words that match this regexp will be ignored my $ignore_match = qr/_/; ###################################################################### # Code # # #################################################################### # Load TLDs my %domains = (); open (TLDS, "<$tlds") or die "Can't load domains from $tlds : $!\n"; while (my $line = ) { chomp($line); my ($domain,$country) = split(/\s+/, $line, 2); $domain =~ s/^\.//; $domains{$domain} = $country; } close(TLDS); # Search dictionary for words ending in each domain foreach my $domain (sort keys %domains) { print "Domains for $domain ($domains{$domain}):\n"; open (DICT, "<$dictionary") or die "Can't load dictionary from $dictionary : $!\n"; my $cached_word = ''; while (my $line = ) { chomp($line); next if (length $line < $min_length); next if ($line =~ m/^${ignore_match}/); next if ($line =~ m/${ignore_start_with}/); $line = lc($line); # Print matches only once (assuming dictionary is sorted) if (($line =~ m/$domain$/) && ($line ne $cached_word)) { print " $line\n"; $cached_word = $line; } } close(DICT); print "\n"; }