Transcript Cryptograms

Cryptograms
A Tour of Code
Initial stuff
package cryptograms
import java.io.File
import Code._
object Cryptograms {
type Word = String
type Pattern = String
type PatternMap = Map[String, List[Word]]
val alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2
discoverCode
/**
* Given a cryptogram, try to find the best guess for the code
* that was used to create the cryptogram.
*/
def discoverCode(codedMessage: String): String = {
val bigMap = makePatternMap(readWordFile)
val codeWords = extractWords(codedMessage).toSet
val codePatterns = getSetOfPatterns(codeWords)
val smallMap = makeCustomizedPatternMap(codePatterns, bigMap)
val patternList = sortPatternsByFrequency(smallMap)
val codeWordList =
sortCodeWordsByPatternFrequency(codeWords.toList, patternList)
val bestCode = findBestCode(codeWordList, smallMap, new Code)
bestCode.encode(alphabet)
}
3
readWordFile
/**
* Returns a list of UPPERCASED words from a file
* that has one word per line.
*/
def readWordFile: List[Word] = {
val file = "unranked-words.txt"
val stream = scala.io.Source.fromFile(file)
val words = stream.getLines.toList
stream.close
words map (_.toUpperCase)
}
4
makePatternMap
/**
* Returns a map from word patterns to a list of
* words having that pattern.
*/
def makePatternMap(words: List[Word]): PatternMap =
words.groupBy(word => patternOf(word))
5
patternOf
/**
* Returns the pattern of a word.
* For example, "people" -> "ABCADB"
*/
def patternOf(word: Word): Pattern = {
var currentLetter = ('A' - 1).toChar
var map = Map[Char, Char]()
for (ch <- word) yield {
if (!ch.isLetter) ch
else if (map contains ch) map(ch)
else {
currentLetter = (currentLetter + 1).toChar
map = map + (ch -> currentLetter)
currentLetter
}
}
}
6
patternOf #2
def patternOf2(word: Word): Pattern = {
def pat2(word: Word,
alpha: String,
map: Map[Char, Char],
pat: Pattern): Pattern =
if (word isEmpty) pat
else if (map contains word.head) {
pat2(word.tail, alpha, map, pat + map(word.head))
}
else {
pat2(word.tail, alpha.tail, map +
(word.head -> alpha.head), pat + alpha.head)
}
pat2(word, alphabet, Map[Char, Char](), "")
}
7
patternOf #3
def patternOf3(word: Word): Pattern = {
def buildMap(word: Word,
alpha: String,
map: Map[Char, Char]): Map[Char, Char] = {
if (word isEmpty) map
else if (map contains word.head) {
buildMap(word.tail, alpha, map)
}
else buildMap(word.tail, alpha.tail,
map + (word.head -> alpha.head))
}
val map = buildMap(word, alphabet, Map[Char, Char]())
for (ch <- word) yield map(ch)
}
8
discoverCode
/**
* Given a cryptogram, try to find the best guess for the code
* that was used to create the cryptogram.
*/
def discoverCode(codedMessage: String): String = {
val bigMap = makePatternMap(readWordFile)
val codeWords = extractWords(codedMessage).toSet
val codePatterns = getSetOfPatterns(codeWords)
val smallMap = makeCustomizedPatternMap(codePatterns, bigMap)
val patternList = sortPatternsByFrequency(smallMap)
val codeWordList =
sortCodeWordsByPatternFrequency(codeWords.toList, patternList)
val bestCode = findBestCode(codeWordList, smallMap, new Code)
bestCode.encode(alphabet)
}
9
extractWords
/**
* Returns a Set of words, uppercased, from a given message.
*/
def extractWords(message: String): Set[Word] = {
message.toUpperCase.trim.split("[^A-Z']+").toSet
}
10
discoverCode
/**
* Given a cryptogram, try to find the best guess for the code
* that was used to create the cryptogram.
*/
def discoverCode(codedMessage: String): String = {
val bigMap = makePatternMap(readWordFile)
val codeWords = extractWords(codedMessage).toSet
val codePatterns = getSetOfPatterns(codeWords)
val smallMap = makeCustomizedPatternMap(codePatterns, bigMap)
val patternList = sortPatternsByFrequency(smallMap)
val codeWordList =
sortCodeWordsByPatternFrequency(codeWords.toList, patternList)
val bestCode = findBestCode(codeWordList, smallMap, new Code)
bestCode.encode(alphabet)
}
11
getSetOfPatterns
/**
* Returns a set of patterns of the given words.
*/
def getSetOfPatterns(words: Set[Word]): Set[Pattern] =
(for (word <- words) yield patternOf(word)).toSet
12
discoverCode
/**
* Given a cryptogram, try to find the best guess for the code
* that was used to create the cryptogram.
*/
def discoverCode(codedMessage: String): String = {
val bigMap = makePatternMap(readWordFile)
val codeWords = extractWords(codedMessage).toSet
val codePatterns = getSetOfPatterns(codeWords)
val smallMap = makeCustomizedPatternMap(codePatterns, bigMap)
val patternList = sortPatternsByFrequency(smallMap)
val codeWordList =
sortCodeWordsByPatternFrequency(codeWords.toList, patternList)
val bestCode = findBestCode(codeWordList, smallMap, new Code)
bestCode.encode(alphabet)
}
13
makeCustomizedPatternMap
/**
* Get map of patterns to all likely common words in message.
*/
def makeCustomizedPatternMap(patterns: Set[Pattern],
bigMap: PatternMap) = {
bigMap filter (pair => patterns contains pair._1)
}
14
discoverCode
/**
* Given a cryptogram, try to find the best guess for the code
* that was used to create the cryptogram.
*/
def discoverCode(codedMessage: String): String = {
val bigMap = makePatternMap(readWordFile)
val codeWords = extractWords(codedMessage).toSet
val codePatterns = getSetOfPatterns(codeWords)
val smallMap = makeCustomizedPatternMap(codePatterns, bigMap)
val patternList = sortPatternsByFrequency(smallMap)
val codeWordList =
sortCodeWordsByPatternFrequency(codeWords.toList, patternList)
val bestCode = findBestCode(codeWordList, smallMap, new Code)
bestCode.encode(alphabet)
}
15
sortPatternsByFrequency
/**
* Get list of patterns sorted according to the number of
* common words fitting that pattern, fewest to most.
*/
def sortPatternsByFrequency(map: PatternMap) = {
val pairs =
map.toList sortWith ((x, y) => x._2.length < y._2.length)
for (pair <- pairs) yield pair._1
}
16
discoverCode
/**
* Given a cryptogram, try to find the best guess for the code
* that was used to create the cryptogram.
*/
def discoverCode(codedMessage: String): String = {
val bigMap = makePatternMap(readWordFile)
val codeWords = extractWords(codedMessage).toSet
val codePatterns = getSetOfPatterns(codeWords)
val smallMap = makeCustomizedPatternMap(codePatterns, bigMap)
val patternList = sortPatternsByFrequency(smallMap)
val codeWordList =
sortCodeWordsByPatternFrequency(codeWords.toList, patternList)
val bestCode = findBestCode(codeWordList, smallMap, new Code)
bestCode.encode(alphabet)
}
17
sortCodeWordsByPatternFrequency
/**
* The pattern list is sorted least-to-most frequent; sort the
* codeWords in the same order.
*/
def sortCodeWordsByPatternFrequency(codeWords: List[Word],
patternList: List[Pattern]) = {
for (
pattern <- patternList;
codeWord <- codeWords;
if patternOf(codeWord) == pattern
) yield codeWord
}
18
discoverCode
/**
* Given a cryptogram, try to find the best guess for the code
* that was used to create the cryptogram.
*/
def discoverCode(codedMessage: String): String = {
val bigMap = makePatternMap(readWordFile)
val codeWords = extractWords(codedMessage).toSet
val codePatterns = getSetOfPatterns(codeWords)
val smallMap = makeCustomizedPatternMap(codePatterns, bigMap)
val patternList = sortPatternsByFrequency(smallMap)
val codeWordList =
sortCodeWordsByPatternFrequency(codeWords.toList, patternList)
val bestCode = findBestCode(codeWordList, smallMap, new Code)
bestCode.encode(alphabet)
}
19
findBestCode, I
/**
* Recursive search to find the best code for a message.
* @param codeWords All remaining code words, rarest pattern first.
* @param smallMap
*
A map of patterns in the cryptogram to possible real words.
* @param code The code to be extended into a more complete code.
*/
def findBestCode(codeWords: List[Word],
smallMap: PatternMap,
code: Code): Code = {
if (codeWords isEmpty) return code // bottom of recursion
if (smallMap isEmpty) return code // out of things to try
val codeWord = codeWords.head
val pattern = patternOf(codeWord)
if (!(smallMap.keys.toList contains pattern)) { // can't match
codeWord
println(s"Can't match $pattern")
return findBestCode(codeWords.tail, smallMap, code)
}
20
findBestCode, II
val realWordsWithSamePattern = smallMap(pattern)
val wordsMatchingCodeWord = shrinkList(codeWord, code,
realWordsWithSamePattern)
if (wordsMatchingCodeWord.isEmpty) {
findBestCode(codeWords.tail, smallMap, code)
}
else {
var bestCode = code
for (realWord <- wordsMatchingCodeWord) {
val nextCode = findBestCode(codeWords.tail, smallMap,
code.extend(realWord, codeWord))
if (nextCode.wordsMatched > bestCode.wordsMatched) {
bestCode = nextCode
}
}
bestCode
}
}
21
shrinkList and code.extend
/**
* Assuming that the given code is correct, eliminate from the
* list of words all those that cannot fit the given codeWord.
*/
def shrinkList(codeWord: Word, code: Code, words: List[Word]) =
words filter (word => isPossibleWord(codeWord, word, code))
/**
* Add plain word -> code word to this code, giving a new code.
*/
def extend(plain: String, code: String): Code = {
val additions = (plain zip code).toMap
val extendedCode = new Code(encoder ++ additions)
extendedCode.wordsMatched = this.wordsMatched + 1
extendedCode
}
22
discoverCode
/**
* Given a cryptogram, try to find the best guess for the code
* that was used to create the cryptogram.
*/
def discoverCode(codedMessage: String): String = {
val bigMap = makePatternMap(readWordFile)
val codeWords = extractWords(codedMessage).toSet
val codePatterns = getSetOfPatterns(codeWords)
val smallMap = makeCustomizedPatternMap(codePatterns, bigMap)
val patternList = sortPatternsByFrequency(smallMap)
val codeWordList =
sortCodeWordsByPatternFrequency(codeWords.toList, patternList)
val bestCode = findBestCode(codeWordList, smallMap, new Code)
bestCode.encode(alphabet)
}
23
Code.encode
/**
* Encodes a plaintext word; unrecognized characters map to '_'.
*/
def encode(plainWord: String) =
for (ch <- plainWord) yield encoder.getOrElse(ch, '_')
24
Sample results
Good:
I DON'T KNOW HALF OF YOU HALF AS WELL
LESS THAN HALF OF YOU HALF AS WELL AS
U BXA'C VAXI QHJZ XZ DXS QHJZ HM IYJJ
JYMM CQHA QHJZ XZ DXS QHJZ HM IYJJ HM
I DON'T KNOW HALF OF YOU HALF AS WELL
LESS THAN HALF OF YOU HALF AS WELL AS
17 characters correctly decoded.
AS I SHOULD LIKE; AND I LIKE
YOU DESERVE.
HM U MQXSJB JUVY; HAB U JUVY
DXS BYMYPNY.
AS I SHOULD LIKE; AND I LIKE
YOU DESERVE.
Medium:
YOU CAN FOOL ALL OF THE PEOPLE SOME
PEOPLE ALL THE TIME, BUT YOU CANNOT
TMN PZY IMMU ZUU MI HOL WLMWUL KMJL
WLMWUL ZUU HOL HEJL, GNH TMN PZYYMH
YOU NAR COOL ALL OC WOE PEOPLE SOME
PEOPLE ALL WOE WEME, GUW YOU NARROW
9 characters correctly decoded.
OF THE TIME,
FOOL ALL THE
MI HOL HEJL,
IMMU ZUU HOL
OC WOE WEME,
COOL ALL WOE
AND SOME OF THE
PEOPLE
ZYA KMJL MI HOL
WLMWUL
ART SOME OC WOE
PEOPLE
25
The End
26