diff options
| -rwxr-xr-x | parse_cgit | 75 | 
1 files changed, 56 insertions, 19 deletions
@@ -12,48 +12,85 @@ fi  FIN="/dev/stdin"  URLBASE="$1"  DEST="$2" -  SUBPATH="/plain" -  CURL="torify curl -Ls " - -PROTO=${URLBASE%%:\/\/*} -DIRBASE="$PROTO/${URLBASE##[a-z]*:\/\/}" -echo "proto: $PROTO" -echo "dirbase: $DIRBASE" - +TMPFILE="./tmp_$$"  READMES="README README.txt README.md readme readme.txt readme.md" +## func +cleanup () { +	rm -f $TMPFILE +	exit  +} + +## func +get_repos() {  repos=$(${CURL} "$URLBASE" | xml2tsv | grep -Ei "/html/body/div/div/table/tr/td/a[[:blank:]]+title=" \  		| awk -F'\t' '{print $3,$4}' \  		| sed -E 's/href=//g;s/ /\|/'\  		) +} +## func +get_descr() { +	reponame=$1 +	url=$2 +	echo "reponame: $reponame" +	echo "url: $url" +	## Get author and description -for r in $repos; do -	printf "Retrieving repo %s...\n" $repo -	link=$(echo "$r" | cut -d "|" -f 1 ) -	name=$(echo "$r" | cut -d "|" -f 2 ) -	baselink=$(printf "%s%s" $URLBASE $link) -	REPODIR="$DEST/$DIRBASE/$link/" -	mkdir -p $REPODIR +	$CURL "$url" | xml2tsv | grep -Ei "/html/body/div/table/tr/td[[:blank:]]+class=sub" | \ +##	cat "$TMPFILE" | grep -Ei "/html/body/div/div/table/tr/td/a[[:blank:]]+href=$link" |\ +		tail -2 | cut -f 3- +	 +} + +## func +get_readmes() { +	LINK=$1 +	DESTDIR=$2  	for f in $READMES; do -		printf "    trying file %s..." $baselink/$SUBPATH/$f -		torify curl -Ls "$baselink/$SUBPATH/$f" > $REPODIR/$f -		failure=$(xml2tsv < $REPODIR/$f 2>/dev/null | \ +		printf "    trying file %s..." $LINK/$f +		$CURL "$LINK/$f" > $DESTDIR/$f +		failure=$(xml2tsv < $DESTDIR/$f 2>/dev/null | \  			grep -Eaic "/html/body/div/div/div[[:blank:]]+class=error[[:blank:]]+Not found")  		echo $failure  		if [ "$failure" != 0 ]; then  -			rm -f $REPODIR/$f +			rm -f $DESTDIR/$f  			printf "[FAILED]\n"  		else  			printf "[OK]\n"  		fi  		sleep 1  	done +} + +PROTO=${URLBASE%%:\/\/*} +DIRBASE="$PROTO/${URLBASE##[a-z]*:\/\/}" +echo "proto: $PROTO" +echo "dirbase: $DIRBASE" + +trap cleanup EXIT KILL TERM INT + +$CURL "${URLBASE}"  | xml2tsv > $TMPFILE  + +get_repos + +for r in $repos; do +	printf "Retrieving repo %s...\n" $repo +	link=$(echo "$r" | cut -d "|" -f 1 ) +	name=$(echo "$r" | cut -d "|" -f 2 ) +	baselink=$(printf "%s%s" $URLBASE $link) +	REPODIR="$DEST/$DIRBASE/$link/" +	mkdir -p $REPODIR +	get_descr "$link" "$baselink" > ${REPODIR}/DESCR	 +	## Get READMEs +	get_readmes "$baselink/$SUBPATH" "$REPODIR"  done +clenaup +  ### the readme file is at REPLINK/plain/README  ### if not found, look for "/html/body/div/div/div  class=error     Not found"  | 
