# *- coding: utf-8 -*-
# $Id: PlagVisualizer.py 1241 2009-09-23 20:28:13Z amelung $
# Copyright (c) 2006 Otto-von-Guericke-Universität Magdeburg
# This file is part of ECAssignmentBox.
#Author: Christian Dervaric
#This module contains several methods to visualize similarities or the
from Visualize import *
from PIL import Image
class PlagVisualizer(object):
"The class contains several methods to visualize similarities or the PlagResults."
def __init__(self):
def resultToHtml(self, result, s1, s2):
"""Takes the PlagResult object result and the two strings from the
Returns a list of strings [s1Marked, s2Marked] in which the
tiles of the result are marked in HTML.
return htmlMaker.resultToHtml(result, s1, s2)
def resultToHtmlText(self, result, s1, s2):
"""Takes the PlagResult object result and the two strings from the
Returns a string containing the html file text that can be used to
manually check the two texts for plagiarism including highlighted
passages which are linked to each other links.
return htmlMaker.resultToHtmlText(result, s1, s2)
# DOTPLOT Methods
def resultToDotplot(self, result, showIdNums=False):
"""Creates an PIL-Image object showing a dotplot using the tiles of
the given PlagResult object result.
Return: PIL Image object
->if set to True shows numbers along the dotplot marking the ids
return dotplot.createDotplotFromResult(result, showIdNums)
def stringsToDotplot(self, string1, string2, w=1, k=1, id1="", id2="", showIdNums=False):
"""A Dotplot shows all w/k-matches (window of length w with k matches)
between two sequences - here two strings s1 and s2.
For every window of length w with at least k matches a dot is drawn
otherwise it will be left blank.
Values w and k should be at least w=k=1 and w>=k.
Return: PIL Image object
Other Options:
->if set to True shows numbers along the dotplot marking the ids
id1, id2:String
->ids for the strings used for marking purposes in the image
return dotplot.createDotplotFromStrings(string1, string2, w, k, id1, id2, showIdNums)
def stringListToDotplot(self, stringList, w=1, k=1, idList=[], showIdNums=True):
"""A Dotplot shows all w/k-matches (window of length w with k matches)
between two sequences - here two strings s1 and s2.
For every window of length w with at least k matches a dot is drawn
otherwise it will be left blank.
Values w and k should be at least w=k=1 and w>=k.
Return: PIL Image object
Other Options:
->if set to True shows numbers along the dotplot marking the ids
id1, id2:String
->ids for the strings used for marking purposes in the image
return dotplot.createDotplotFromStringList(stringList, w, k, idList, showIdNums)
# TORC Method
def resultsToTorc(self, resultList, colored=False):
"""Takes the result and returns an image showing a Torc indicating the
similarity relations of the compared texts in the results.
A Torc is a kind of overview which allows the user to recognize the similarity
relations between different texts. Therefore all texts are arranged on a circle.
For each relation of similarity between two texts a connecting line is drawn.
Return: PIL Image object
return torc.resultsToTorc(resultList, colored)
# Heatmap Methods
def resultsToIntensityHeatmap(self, resultList, scopes=[0.2, 0.4, 0.6, 0.8, 1], recSize = 20, onlyPositiveResults=True):
"""Creates an intensty heatmap chart. Showing all positive results in a chart
colored rectangles indicate the grade of plagirism between the plagiriarim
scopes - a list of numbers defining scopes in following way
e.g. [0.2, 0.4, 0.6, 0.8, 1] (default) would define the
scopes 0<=x<=0.2, 0.2<x<=0.4, ... ,0.8<x<=1
Each scope gets its own color in the heatmap.
recSize - int value that expresses the size in pixel of a single heatmap
onlyPositiveResults - If set to True only positive results will be used
to create the heatmap. If no positive result is
found None will be returned.
Return: PIL Image Object
return heatmap.createIntensityHeatmap(resultList,
scopes = scopes,
recSize = recSize,
def resultsToClusterHeatmap(self, resultList, recSize = 20, onlyPositiveResults=True):
"""Creates a cluster heatmap chart. Showing all clusters of plagiarized results by
positive results in a chart
colored rectangles indicate the grade of plagirism between the plagiriarim
recSize - int value that expresses the size in pixel of a single heatmap
onlyPositiveResults - If set to True only positive results will be used
to create the heatmap. If no positive result is
found None will be returned.
Return: PIL Image Object
return heatmap.createClusterHeatmap(resultList,
recSize = recSize,
# Patterngrams
def stringsToCategoricalPatterngram(self, basefile, ensemble, klength, normName, mode=1):
"""Creates a Categorical Patterngram as PIL Image object. It "shows
the degree of similarity that exists between one file [(the base
file)] and the rest of the ensemble of files." (Rib00)
basefile - the single file in the 'one-to-many' comparison
ensemble - the set of files used for the comparison
klength - the length of the used ngrams
normName - the name of the normalizer used to normalize the texts
"[A] categorical patterngram [...] [is] a visualization
that displays, for each character position x in the the base
file, the number of ensemble files that contain the k-length
n-gram that begins at that position. A point is plotted at
coordinate (x,y) if the k-length n-gram beginning at character
x in the base file occurs at any location in y files of the
ensemble." (Rib00)
Rib00 - "Using Visualization to Detect Plagiarism in Computer Science Classes",
Randy L. Ribler, Marc Abrams, 2000
return patterngram.createCategoricalPatterngram(basefile, ensemble, klength, normName, mode)
def stringsToCompositeCategoricalPatterngram(self, basefile, ensemble, klength, normName, mode=1):
"""Creates a Composite Categorical Patterngram as PIL Image object. It "shows
which particular files are similar." (Rib00)
basefile - the single file in the 'one-to-many' comparison
ensemble - the set of files used for the comparison
klength - the length of the used ngrams
normName - the name of the normalizer used to normalize the texts
A composite categorical patterngram is a visualization that displays,
for each character position x in the the base file, which file of the
ensemble files also contain the k-length n-gram that begins at that
position. "A point is plotted at (x,y) if the k-length n-gram beginning
at x in the base file occurs one or more times in file y." (Rib00)
Rib00 - "Using Visualization to Detect Plagiarism in Computer Science Classes",
Randy L. Ribler, Marc Abrams, 2000
return patterngram.createCompositeCategoricalPatterngram(basefile, ensemble, klength, normName, mode)
# Statistic Methods
def resultsToStatistic(self, resultList, statName=None):
"""TODO: For now only one overview statistic is available which will be
returned as PIL image object.
The Overview shows the total number of texts, the number of possible
plagiarism texts and the number of plagiarism clusters.
return statistics.statOverview2(resultList)
except NameError, ImportError:
return None
def writeImageToFile(self, img, filename):
"""Writes the given img to to specified place (filename=path+filename) on
the hard disk.
The format will be taken from the file extension. Formats available are
amongst others: png, jpeg, bmp
For detailed informations please check out Python Imaging Library
-> http://www.pythonware.com/library/pil/handbook/index.htm
except IOError:
print "Writing img to disk failed.\n Error Message:\n" + IOError.message