##library à charger
library(bitops)
library(gdata)
library(caTools)
library(tools)
library(gplots)
library(timeSeries)
library(timeDate)
library(fBasics)
library(igraph)
library(tcltk )
library(ade4)
library(Cairo)



#netoyage de la base reconstituée
##Sur ces mots, selection en CSV
setwd('C:\\Users\\alaure\\Desktop\\NICO\\Version octobre\\Datas')
C=read.csv("REST.csv",sep=',',header=TRUE)

C=subset(C,X.1==1,select=c(-X.1))
mots=C$X
C=subset(C,select=c(-X))
vecteurArtistes=colnames(C)

## de nouveau, ménage pour les lignes ou moins de X personnes
di=dim(C)
garder=rep(1,di[1])
for (i in 1:di[1])           {
marg=as.data.frame(table(C[i,]))
margtot=0
  for (j in 2:di[2])  {if (marg[j]==0) {margtot=margtot+1} else {margtot=margtot}  }
  if (margtot>(di[2]-9)) {garder[i]<-0}     }
E=subset(C, garder==1)
mots2=subset(mots, garder==1)
                   di=dim(E)
NBArtist=rep(1,di[1])
for (i in 1:di[1])           {
  marg=as.data.frame(table(E[i,]))
  NBArtist[i]=0
  for (j in 2:di[2])  {if (marg[j]==0) {NBArtist[i]=NBArtist[i]} else {NBArtist[i]=NBArtist[i]+1}  }  }
  
  

## piechart                                         
u=apply ( E,1,sum)
u=as.matrix(u)
pie.u <- u/(apply(u,2,sum))
names(pie.u) <- mots2
par(mai=c(.3,0.3,.3,.3))
pie(pie.u,main="Piechart", col=c("#00FF00","#FF0080","#4B0082","#FFD700","#4682B4","#FFEEEE","#BC81FF","#F0E68C","#DC143C","#FFFF00","#FF0000","#00BFFF","#DA70D6","#40E0D0","#FF1493","#FFFFFF","#6A5ACD"))


 ##### barplot
u=apply ( E,1,sum)
u=as.matrix(u)
u=cbind(NBArtist,u)
rownames(u)<-mots2
colnames(u)<-c("Number of artist using each word","Number of occurence of each word")
par(mai=c(.5,2,.4,.5))
barplot(height=u[,2],ylab="",horiz=TRUE,beside=TRUE,names=mots2,las=1,main="Nombre de mots par artistes")
barplot(height=u[,1],ylab="",horiz=TRUE,beside=TRUE,names=mots2,las=1,main="Nombre d'artistes qui utilisent un mot")


barplot2(height=t(u),ylab="",horiz=TRUE,beside=TRUE,names=mots2,las=1,legend=colnames(u))


##Assocplot : words / artists
#rownames(E)<-mots2
#assoc(E[1:5,1:5])

## plot with two dimensions : link between two words and the artists
dim(E )
apply(E,2,sum)

Mosa=subset(E,mots2=="work"|mots2=="Woman-women"|mots2=="video"|mots2=="politics"|mots2=="feminist",
      select=c(Anne.Mie.Van.Kerckhoven , Jo.Spence, Ewa.Partum , Girls.Who.Like.Porno ,   Maria.Ruido )   )
    mosaicplot(Mosa,main="",shade = TRUE)

 #Plot with dimension : the way the artist are using two words 
par(mai=c(1,1,.4,.5))
 plot(t(subset(E,mots2=="work")),t(subset(E,mots2=="Woman-women")),xlab="Work",ylab="Woman-Women",ylim=c(0,75),xlim=c(0,150) )
   text(t(subset(E,mots2=="Woman-women"))~t(subset(E,mots2=="work")), labels=colnames(E),pos = 4, pch=3,cex=0.8)
 cor(t(subset(E,mots2=="work")),t(subset(E,mots2=="Woman-women")))

 plot(t(subset(E,mots2=="work")),t(subset(E,mots2=="politics")),xlab="Work",ylab="politics",ylim=c(0,75),xlim=c(0,150) )
cor(t(subset(E,mots2=="work")),t(subset(E,mots2=="politics")))
 cor(t(subset(E,mots2=="Woman-women")),t(subset(E,mots2=="power")))
 plot(t(subset(E,mots2=="Woman-women")),t(subset(E,mots2=="power")),xlab="Work",ylab="politics",ylim=c(0,75),xlim=c(0,150) )

###### Working with the proportion represented by each word in place of the number of time###

  a<-dim(E)
Eprop<-matrix(0, nrow = a[1], ncol = a[2])
Eprop2=as.data.frame(Eprop)
totper=rep(0,a[1])
for (i in 1:a[1])  { (totper[i]<-sum(E[i,1:a[2]]))}
for (i in 1:a[1]) (Eprop2[i,]<-E[i,]/totper[i])
colnames(Eprop2)<-colnames(E)   
## Graphique 3 D
symbols(x=t(subset(Eprop2,mots2=="work")),y=t(subset(Eprop2,mots2=="Woman-women")),circles=t(subset(Eprop2, mots2=="feminist")), 
inches=.3,xlab="Work",ylab="Woman-Women",bg = 1:10,fg="gray30", main = "Circles Plot - Work, Woman-Women, and size = feminist") 
text(t(subset(Eprop2,mots2=="Woman-women"))~t(subset(Eprop2,mots2=="work")), labels=colnames(Eprop2),pos = 4, pch=3,cex=0.8)


symbols(x=t(subset(Eprop2,mots2=="politics")),y=t(subset(Eprop2,mots2=="gender")),circles=t(subset(Eprop2, mots2=="feminist")), 
inches=.3,xlab="Work",ylab="Woman-Women",bg = 1:10,fg="gray30", main = "Circles Plot - Politics, gender, and size = feminist") 
   text(t(subset(Eprop2,mots2=="gender"))~t(subset(Eprop2,mots2=="politics")), labels=colnames(Eprop2),pos = 4, pch=3,cex=0.8)
 
 symbols(x=t(subset(Eprop2,mots2=="politics")),y=t(subset(Eprop2,mots2=="gender")),circles=t(subset(Eprop2, mots2=="feminist")),xlim=c(0,.1),ylim=c(0,.1), 
inches=.3,xlab="Work",ylab="Woman-Women",bg = 1:10,fg="gray30", main = "Circles Plot - Politics, gender, and size = feminist") 
   text(t(subset(Eprop2,mots2=="gender"))~t(subset(Eprop2,mots2=="politics")), labels=colnames(Eprop2),pos = 4, pch=3,cex=0.8)


   ##################### Expliquer ici les PCA ######################

##Sur ces mots, selection en CSV
C=read.csv("REST.csv",sep=',',header=TRUE) 
mots=C$X
mots=subset(mots,C$X.1==1)
C=subset(C,X.1==1,select=c(-X,-X.1))
vecteurArtistes=colnames(C)

## de nouvea ménage pour les lignes ou moins de X personnes
di=dim(C)
garder=rep('',di[1])    
for (i in 1:di[1])           {
marg=as.data.frame(table(C[i,]))
margtot=0
  for (j in 2:di[2])  {if (marg[j]==0) {margtot=margtot+1} else {margtot=margtot}  }
  if (margtot>(di[2]-9)) {garder[i]<-'NON'}     }
E=subset(C, garder=='')
mots=subset(mots, garder=='')
di=dim(E)

## deuxième étape :a partir de là, on transpose
##Total pour celles qui ont plus de 8 mots
dC=dim(E)
tot=rep(0,dC[2])
pc=t(E)

for (i in 1:dC[2])  { (tot[i]<-sum(pc[i,]))}
pc1=rep(0,dC[1])
vectorshort=rep("blank",1)
for (i in 1:dC[2]) (if (tot[i]<5) {cat("out ")} 
      else 
     { pc1=rbind(pc1,pc[i,])
      vectorshort=rbind(vectorshort,vecteurArtistes[i])}        )
rownames(pc1)<-vectorshort  
d1=dim(pc1)

## troisième étape : homogénéiser le nombre de mots
##par personne, pour celles qui restent

a<-dim(pc1)
pc2<-matrix(0, nrow = a[1], ncol = a[2])
totper=rep(0,a[1])
for (i in 1:a[1])  { (totper[i]<-sum(pc1[i,1:a[2]]))}
for (i in 1:a[1]) (pc2[i,]<-pc1[i,]/totper[i])
colnames(pc2)<- mots
rownames(pc2)<-rownames(pc1)



##et voilà la PCA
dimpc2=dim(pc2)
pc3=pc2[2:dimpc2[1],]
uD3=dudi.pca(pc3,scannf=FALSE,nf=4)

   
par(col='red', col.main='blue', font=1, family="courrier")

par(col='red')
scatter(uD3, xax = 1, yax = 2,clab.row = 0.8,clab.col=0.8,permute=TRUE)


par(col='red')
scatter(uD3, xax = 3, yax = 4,clab.row = 0.8,clab.col=0.8,permute=TRUE)

z=length(uD3$eig)
sut=uD3$eig/sum(  uD3$eig)

barplot(sut, name=rep(1:z,1), main="Eigen Values"  )



par(col='black')
axe1=uD3$co
axea=cbind(rep(1,length(axe1[,1])),axe1[,1] )
axeb=cbind(rep(1,length(axe1[,1])),axe1[,2] )

rownames(axea)<-rownames(axe1)  
rownames(axeb)<-rownames(axe1)  


plot(axea[,2]~axea[,1],ylim=c(-1,1), pch=19)
text(axea[,2]~axea[,1], labels=rownames(axea),pos = 4, pch=19)


plot(axeb[,2]~axeb[,1],ylim=c(-1,1), pch=19)
text(axeb[,2]~axeb[,1], labels=rownames(axeb),pos = 4)







## a totaly different way of looking à this data
## i graph##

Eprime=as.matrix(t(E))%*%as.matrix(E)

F=graph.adjacency(Eprime,diag=FALSE,weighted=TRUE,mode="undirected")
names.Vertex=rownames(Eprime)                  
plot.igraph(F,layout=layout.fruchterman.reingold,vertex.size=10,vertex.label=names.Vertex)
           clusters(F)
            
EConnect=subset(E, select=c(-Klub.Zwei,-Migrantas,-Elssie.Ansareo))
Eprimeconnect0=as.matrix(t(EConnect))%*%as.matrix(EConnect)
EprimeConnect=as.matrix(Eprimeconnect0)
Fconnect=graph.adjacency(Eprimeconnect,diag=FALSE,weighted=TRUE,mode="undirected")
names.Vertex.Connect=substring(rownames(EprimeConnect),1,10)

clusters(Fconnect)                                        
plot.igraph(Fconnect,layout=layout.fruchterman.reingold,vertex.size=15,vertex.label=names.Vertex.Connect)
plot.igraph(Fconnect,layout=layout.kamada.kawai,vertex.size=30,vertex.label=names.Vertex.Connect)

        
                                                  
                                       
##Eliminating some week links to see the stronger one  
EprimeconnectResum<-Eprimeconnect
dEprimeconnect=dim(Eprimeconnect)
for (i in 1:dEprimeconnect[1])    
     (for (j in 1:dEprimeconnect[2]) 
        (if (Eprimeconnect[i,j]<200) {EprimeconnectResum[i,j]<-0}
            else {EprimeconnectResum[i,j]<-Eprimeconnect[i,j]}  ))
            
FconnectResum=graph.adjacency(EprimeconnectResum,diag=FALSE,weighted=TRUE,mode="undirected")
names.Vertex.Connect.resum=substring(rownames(EprimeconnectResum),1,10)
                                        
plot.igraph(FconnectResum,layout=layout.fruchterman.reingold,vertex.size=15,vertex.label=names.Vertex.Connect.resum)
           clusters(FconnectResum)
plot.igraph(FconnectResum,layout=layout.kamada.kawai,vertex.size=30,vertex.label=names.Vertex.Connect)
                                

### moiiie !
tkplot(F, canvas.width=600, canvas.height=500,vertex.label=names.Vertex)
tkplot(Fconnect, canvas.width=600, canvas.height=500,vertex.label=names.Vertex.Connect)
tkplot(FconnectResum, canvas.width=600, canvas.height=500,vertex.label=names.Vertex.Connect.resum)


### pourquoi pas tant qu'on y est... 
 pagerank=page.rank(F)
 pagerank2=as.data.frame(pagerank$vector )
rownames(pagerank2)<-rownames(Eprime)
par(mai=c(.5,2,.4,.5))
barplot(height=pagerank2[,1],ylab="",horiz=TRUE,beside=TRUE,names=rownames(Eprime),col="#4682B4",las=1,main="Page rank of the artists")