/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* 
 * The contents of this file are subject to the Netscape Public License
 * Version 1.0 (the "NPL"); you may not use this file except in
 * compliance with the NPL.  You may obtain a copy of the NPL at
 * http://www.mozilla.org/NPL/
 *
 * Software distributed under the NPL is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
 * for the specific language governing rights and limitations under the
 * NPL.
 *
 * The Initial Developer of this code under the NPL is Netscape
 * Communications Corporation.  Portions created by Netscape are
 * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
 * Reserved.
 */


/**
 * MODULE NOTES:
 * @update  gess 4/1/98
 * 
 */

#include "nsExpatTokenizer.h"
#include "nsScanner.h"
#include "nsDTDUtils.h"
#include "nsParserError.h"
#include "nsIParser.h"
#include "prlog.h"

#include "prmem.h"
#include "nsIUnicharInputStream.h"
#ifdef NECKO
#include "nsNeckoUtil.h"
#else
#include "nsINetService.h"
#endif
#include "nsIServiceManager.h"

 /************************************************************************
  And now for the main class -- nsExpatTokenizer...
 ************************************************************************/

static NS_DEFINE_IID(kISupportsIID,       NS_ISUPPORTS_IID);                 
static NS_DEFINE_IID(kITokenizerIID,      NS_ITOKENIZER_IID);
static NS_DEFINE_IID(kHTMLTokenizerIID,   NS_HTMLTOKENIZER_IID);
static NS_DEFINE_IID(kClassIID,           NS_EXPATTOKENIZER_IID);
#ifndef NECKO
static NS_DEFINE_IID(kNetServiceCID,      NS_NETSERVICE_CID);
static NS_DEFINE_IID(kINetServiceIID,     NS_INETSERVICE_IID);
#endif


static CTokenRecycler* gTokenRecycler=0;
static nsDeque* gTokenDeque=0;
static XML_Parser gExpatParser=0;

/**
 *  This method gets called as part of our COM-like interfaces.
 *  Its purpose is to create an interface to parser object
 *  of some type.
 *  
 *  @update   gess 4/8/98
 *  @param    nsIID  id of object to discover
 *  @param    aInstancePtr ptr to newly discovered interface
 *  @return   NS_xxx result code
 */
nsresult nsExpatTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr)  
{                                                                        
  if (NULL == aInstancePtr) {                                            
    return NS_ERROR_NULL_POINTER;                                        
  }                                                                      

  if(aIID.Equals(kISupportsIID))    {  //do IUnknown...
    *aInstancePtr = (nsExpatTokenizer*)(this);                                        
  }
  else if(aIID.Equals(kITokenizerIID)) {  //do ITokenizer base class...    
    *aInstancePtr = (nsITokenizer*)(this);
  }
  else if(aIID.Equals(kHTMLTokenizerIID)) {  //do nsHTMLTokenizer base class...
    *aInstancePtr = (nsHTMLTokenizer*)(this);                                        
  }
  else if(aIID.Equals(kClassIID)) {  //do this class...
    *aInstancePtr = (nsExpatTokenizer*)(this);                                        
  }                 
  else {
    *aInstancePtr=0;
    return NS_NOINTERFACE;
  }
  NS_ADDREF_THIS();
  return NS_OK;                                                        
}


/**
 *  This method is defined in nsIParser. It is used to 
 *  cause the COM-like construction of an nsParser.
 *  
 *  @update  gess 4/8/98
 *  @param   nsIParser** ptr to newly instantiated parser
 *  @return  NS_xxx error result
 */
NS_HTMLPARS nsresult NS_New_Expat_Tokenizer(nsIDTD** aInstancePtrResult) {
  nsExpatTokenizer* it = new nsExpatTokenizer();
  if (it == 0) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  return it->QueryInterface(kClassIID, (void **) aInstancePtrResult);
}


NS_IMPL_ADDREF(nsExpatTokenizer)
NS_IMPL_RELEASE(nsExpatTokenizer)

/**
 * Sets up the callbacks for the expat parser      
 * @update  nra 2/24/99
 * @param   none
 * @return  none
 */
void nsExpatTokenizer::SetupExpatCallbacks(void) {
  if (mExpatParser) {
    XML_SetElementHandler(mExpatParser, HandleStartElement, HandleEndElement);    
    XML_SetCharacterDataHandler(mExpatParser, HandleCharacterData);
    XML_SetProcessingInstructionHandler(mExpatParser, HandleProcessingInstruction);
    XML_SetDefaultHandler(mExpatParser, nsnull);
    XML_SetUnparsedEntityDeclHandler(mExpatParser, HandleUnparsedEntityDecl);
    XML_SetNotationDeclHandler(mExpatParser, HandleNotationDecl);
    XML_SetExternalEntityRefHandler(mExpatParser, HandleExternalEntityRef);
    XML_SetCommentHandler(mExpatParser, HandleComment);
    XML_SetUnknownEncodingHandler(mExpatParser, HandleUnknownEncoding, NULL);    
  }
}


/**
 *  Default constructor
 *   
 *  @update  gess 4/9/98
 *  @param   
 *  @return  
 */
nsExpatTokenizer::nsExpatTokenizer() : nsHTMLTokenizer() {  
  NS_INIT_REFCNT();
  mBytesParsed = 0;
  mSeenError = PR_FALSE;
  nsAutoString buffer("UTF-16");
  const PRUnichar* encoding = buffer.GetUnicode();
  if (encoding) {
    mExpatParser = XML_ParserCreate((const XML_Char*) encoding);
#ifdef XML_DTD
    XML_SetParamEntityParsing(mExpatParser, XML_PARAM_ENTITY_PARSING_ALWAYS);
#endif
    gTokenRecycler=(CTokenRecycler*)GetTokenRecycler();
    if (mExpatParser) {
      SetupExpatCallbacks();
    }
  }
}

/**
 *  Destructor
 *   
 *  @update  gess 4/9/98
 *  @param   
 *  @return  
 */
nsExpatTokenizer::~nsExpatTokenizer(){
  if (mExpatParser) {
    XML_ParserFree(mExpatParser);
    mExpatParser = nsnull;
  }
}


/*******************************************************************
  Here begins the real working methods for the tokenizer.
 *******************************************************************/


void nsExpatTokenizer::SetErrorContextInfo(nsParserError* aError, PRUint32 aByteIndex, 
                                const char* aSourceBuffer, PRUint32 aLength)
{
  /* Figure out the substring inside aSourceBuffer that contains the line on which the error
     occurred.  Copy the line into aError->sourceLine */
  PR_ASSERT(aByteIndex > 0 && aByteIndex < aLength);
  /* Assert that the byteIndex and the length of the buffer is even */
  PR_ASSERT(aByteIndex % 2 == 0 && aLength % 2 == 0);  
  PRUnichar* start = (PRUnichar* ) &aSourceBuffer[aByteIndex];  /* Will try to find the start of the line */
  PRUnichar* end = (PRUnichar* ) &aSourceBuffer[aByteIndex];    /* Will try to find the end of the line */
  PRUint32 startIndex = aByteIndex / 2;          /* Track the position of the 'start' pointer into the buffer */
  PRUint32 endIndex = aByteIndex / 2;          /* Track the position of the 'end' pointer into the buffer */
  PRUint32 numCharsInBuffer = aLength / 2;
  PRBool reachedStart;
  PRBool reachedEnd;
  

  /* Use start to find the first new line before the error position and
     end to find the first new line after the error position */
  reachedStart = ('\n' == *start || '\r' == *start || startIndex <= 0);
  reachedEnd = ('\n' == *end || '\r' == *end || endIndex >= numCharsInBuffer);
  while (!reachedStart || !reachedEnd) {
    if (!reachedStart) {
      start--;
      startIndex--;
      reachedStart = ('\n' == *start || '\r' == *start || startIndex <= 0);
    }
    if (!reachedEnd) {
      end++;
      endIndex++;
      reachedEnd = ('\n' == *end || '\r' == *end || endIndex >= numCharsInBuffer);
    }
  }

  if (startIndex == endIndex) {
    /* Special case if the error is on a line where the only character is a newline */
    aError->sourceLine.Append("");
  }
  else {
    PR_ASSERT(endIndex - startIndex >= 2);
    /* At this point, there are two cases.  Either the error is on the first line or
       on subsequent lines.  If the error is on the first line, startIndex will decrement
       all the way to zero.  If not, startIndex will decrement to the position of the
       newline character on the previous line.  So, in the first case, the start position
       of the error line = startIndex (== 0).  In the second case, the start position of the
       error line = startIndex + 1.  In both cases, the end position of the error line will be 
       (endIndex - 1).  */
    PRUint32 startPosn = (startIndex <= 0) ? startIndex : startIndex + 1;
        
    /* At this point, the substring starting at startPosn and ending at (endIndex - 1),
       is the line on which the error occurred. Copy that substring into the error structure. */
    const PRUnichar* unicodeBuffer = (const PRUnichar*) aSourceBuffer;
    aError->sourceLine.Append(&unicodeBuffer[startPosn], endIndex - startPosn);
  }
}

/* 
 * Called immediately after an error has occurred in expat.  Creates
 * an error token and pushes it onto the token queue.
 *
 */
void nsExpatTokenizer::PushXMLErrorToken(const char *aBuffer, PRUint32 aLength)
{
  CErrorToken* token= (CErrorToken *) gTokenRecycler->CreateTokenOfType(eToken_error, eHTMLTag_unknown);
  nsParserError *error = new nsParserError;
  PRUint32 byteIndexRelativeToFile = 0;

  if(error){  
    error->code = XML_GetErrorCode(mExpatParser);
    error->lineNumber = XML_GetCurrentLineNumber(mExpatParser);
    error->colNumber = XML_GetCurrentColumnNumber(mExpatParser);  
    error->description = XML_ErrorString(error->code);
    byteIndexRelativeToFile = XML_GetCurrentByteIndex(mExpatParser);  
    SetErrorContextInfo(error, (byteIndexRelativeToFile - mBytesParsed), aBuffer, aLength);  
    token->SetError(error);

    CToken* theToken = (CToken* )token;
    AddToken(theToken, NS_OK, *gTokenDeque,gTokenRecycler);
  }
}

nsresult nsExpatTokenizer::ParseXMLBuffer(const char* aBuffer, PRUint32 aLength){
  nsresult result=NS_OK;
  if (mExpatParser) {    
    if (!XML_Parse(mExpatParser, aBuffer, aLength, PR_FALSE)) {
      PushXMLErrorToken(aBuffer, aLength);
      result=NS_ERROR_HTMLPARSER_STOPPARSING;
    }
	  mBytesParsed += aLength;
  }
  else {
    result = NS_ERROR_FAILURE;
  }
  return result;
}


/**
 *  This method repeatedly called by the tokenizer. 
 *  Each time, we determine the kind of token were about to 
 *  read, and then we call the appropriate method to handle
 *  that token type.
 *  
 *  @update gess 3/25/98
 *  @param  aChar: last char read
 *  @param  aScanner: see nsScanner.h
 *  @param  anErrorCode: arg that will hold error condition
 *  @return new token or null 
 */
nsresult nsExpatTokenizer::ConsumeToken(nsScanner& aScanner) {
  
  // return nsHTMLTokenizer::ConsumeToken(aScanner);

  // Ask the scanner to send us all the data it has
  // scanned and pass that data to expat.
  nsresult result = NS_OK;
  nsString& theBuffer = aScanner.GetBuffer();
  PRInt32 length = theBuffer.Length();
  if(0 < length) {
    const PRUnichar* expatBuffer = theBuffer.GetUnicode();
    PRUint32 bufLength = theBuffer.Length() * 2;
    if (expatBuffer) {
      gTokenDeque=&mTokenDeque;
      gExpatParser = mExpatParser;
      result = ParseXMLBuffer((const char *)expatBuffer, bufLength);
    }
    theBuffer.Truncate(0);
  }
  if(NS_OK==result)
    result=aScanner.Eof();
  return result;
}


/**
 * 
 * @update	gess12/29/98
 * @param 
 * @return
 */
void nsExpatTokenizer::FrontloadMisplacedContent(nsDeque& aDeque){
}

/***************************************/
/* Expat Callback Functions start here */
/***************************************/

void nsExpatTokenizer::HandleStartElement(void *userData, const XML_Char *name, const XML_Char **atts){
  CToken* theToken=gTokenRecycler->CreateTokenOfType(eToken_start,eHTMLTag_unknown);
  if(theToken) {
    nsString& theString=theToken->GetStringValueXXX();
    theString.SetString((PRUnichar *) name);
    AddToken(theToken,NS_OK,*gTokenDeque,gTokenRecycler);
    int theAttrCount=0;
    while(*atts){
      theAttrCount++;
      CAttributeToken* theAttrToken= (CAttributeToken*)gTokenRecycler->CreateTokenOfType(eToken_attribute,eHTMLTag_unknown);
      if(theAttrToken){
        nsString& theKey=theAttrToken->GetKey();
        theKey.SetString((PRUnichar *) (*atts++));
        nsString& theValue=theAttrToken->GetStringValueXXX();
        theValue.SetString((PRUnichar *) (*atts++));
      }
      CToken* theTok=(CToken*)theAttrToken;
      AddToken(theTok,NS_OK,*gTokenDeque,gTokenRecycler);
    }
    theToken->SetAttributeCount(theAttrCount);
  }
  else{
    //THROW A HUGE ERROR IF WE CANT CREATE A TOKEN!
  }
}

void nsExpatTokenizer::HandleEndElement(void *userData, const XML_Char *name) {
  CToken* theToken=gTokenRecycler->CreateTokenOfType(eToken_end,eHTMLTag_unknown);
  if(theToken) {
    nsString& theString=theToken->GetStringValueXXX();
    theString.SetString((PRUnichar *) name);
    AddToken(theToken,NS_OK,*gTokenDeque,gTokenRecycler);
  }
  else{
    //THROW A HUGE ERROR IF WE CANT CREATE A TOKEN!
  }
}

void nsExpatTokenizer::HandleCharacterData(void *userData, const XML_Char *s, int len) { 
  CCDATASectionToken* currentCDataToken = (CCDATASectionToken*) userData;
  PRBool StartOfCDataSection = (!currentCDataToken && len == 0);
  PRBool EndOfCDataSection = (currentCDataToken && len == 0);

  // Either create a new token (if not currently within a CDATA section) or add the
  // current string from expat to the current CDATA token.

  if (StartOfCDataSection) {
    // Set up state so that we know that we are within a CDATA section.
    currentCDataToken = (CCDATASectionToken*) gTokenRecycler->CreateTokenOfType(eToken_cdatasection,eHTMLTag_unknown);
    XML_SetUserData(gExpatParser, (void *) currentCDataToken);
  }
  else if (EndOfCDataSection) {
    // We've reached the end of the current CDATA section. Push the current CDATA token
    // onto the token queue and reset state to being outside a CDATA section.
    CToken* tempCDATAToken = (CToken*) currentCDataToken;
    AddToken(tempCDATAToken,NS_OK,*gTokenDeque,gTokenRecycler);
    currentCDataToken = 0;
    XML_SetUserData(gExpatParser, 0);
  }
  else if (currentCDataToken) {
    // While there exists a current CDATA token, keep appending all strings from expat into it.
    nsString& theString = currentCDataToken->GetStringValueXXX();
    theString.Append((PRUnichar *) s,len);
  }
  else {
    CToken* newToken = 0;

    switch(s[0]){
      case kNewLine:
      case CR:
        newToken=gTokenRecycler->CreateTokenOfType(eToken_newline,eHTMLTag_unknown); break;
      case kSpace:
      case kTab:
        newToken=gTokenRecycler->CreateTokenOfType(eToken_whitespace,eHTMLTag_unknown); break;
      default:
        newToken=gTokenRecycler->CreateTokenOfType(eToken_text,eHTMLTag_unknown);
    }
    
    if(newToken) {
      if ((s[0] != kNewLine) && (s[0] != CR)) {
        nsString& theString=newToken->GetStringValueXXX();
        theString.Append((PRUnichar *) s,len);
      }
      AddToken(newToken,NS_OK,*gTokenDeque,gTokenRecycler);
    }
    else {
      //THROW A HUGE ERROR IF WE CANT CREATE A TOKEN!
    }
  }  
}

void nsExpatTokenizer::HandleComment(void *userData, const XML_Char *name) {
  CToken* theToken=gTokenRecycler->CreateTokenOfType(eToken_comment, eHTMLTag_unknown);
  if(theToken) {
    nsString& theString=theToken->GetStringValueXXX();
    theString.SetString((PRUnichar *) name);
    AddToken(theToken,NS_OK,*gTokenDeque,gTokenRecycler);
  }
  else{
    //THROW A HUGE ERROR IF WE CANT CREATE A TOKEN!
  }
}

void nsExpatTokenizer::HandleProcessingInstruction(void *userData, const XML_Char *target, const XML_Char *data){
  CToken* theToken=gTokenRecycler->CreateTokenOfType(eToken_instruction,eHTMLTag_unknown);
  if(theToken) {
    nsString& theString=theToken->GetStringValueXXX();
    theString.Append("<?");
    theString.Append((PRUnichar *) target);
    if(data) {
      theString.Append(" ");
      theString.Append((PRUnichar *) data);
    }
    theString.Append("?>");
    AddToken(theToken,NS_OK,*gTokenDeque,gTokenRecycler);
  }
  else{
    //THROW A HUGE ERROR IF WE CANT CREATE A TOKEN!
  }
}

void nsExpatTokenizer::HandleDefault(void *userData, const XML_Char *s, int len) {
//  NS_NOTYETIMPLEMENTED("Error: nsExpatTokenizer::HandleDefault() not yet implemented.");
}

void nsExpatTokenizer::HandleUnparsedEntityDecl(void *userData, 
                                          const XML_Char *entityName, 
                                          const XML_Char *base, 
                                          const XML_Char *systemId, 
                                          const XML_Char *publicId,
                                          const XML_Char *notationName) {
  NS_NOTYETIMPLEMENTED("Error: nsExpatTokenizer::HandleUnparsedEntityDecl() not yet implemented.");
}

nsresult
nsExpatTokenizer::OpenInputStream(nsString2& aURLStr, nsIInputStream*& in) 
{
  nsresult ret;
#ifndef NECKO
  nsINetService* pNetService = nsnull;

  ret = nsServiceManager::GetService(kNetServiceCID,
                                     kINetServiceIID, (nsISupports**) &pNetService);
  /* get the url
   */
  nsIURI    *url = nsnull;

  ret = pNetService->CreateURL(&url, aURLStr);
  if (NS_FAILED(ret)) {
#ifdef NS_DEBUG
    char *s = aURLStr.ToNewCString();
    printf("\n** cannot create URL for %s\n", s?s:"null");
    delete s;
#endif
    return ret;
  }
  //
  ret = pNetService->OpenBlockingStream(url, nsnull, &in);
  NS_RELEASE(url);
#ifdef NS_DEBUG
  if (NS_FAILED(ret) || !in) {
    char *s = aURLStr.ToNewCString();
    printf("\n** cannot open stream: %s\n", s?s:"null");
    delete s;
  }
#endif

#else // NECKO
  nsIURI* uri;

  ret = NS_NewURI(&uri, aURLStr);
  if (NS_FAILED(ret)) return ret;

  ret = NS_OpenURI(&in, uri, nsnull);   // XXX need to pass the document's nsILoadGroup here!
  NS_RELEASE(uri);
#endif // NECKO
  return ret;
}

nsresult nsExpatTokenizer::LoadStream(nsIInputStream* in, 
                                      PRUnichar* &uniBuf, PRUint32 &retLen)
{
  // read it
  PRUint32               aCount = 1024,
                         bufsize = aCount*sizeof(PRUnichar);  
  nsIUnicharInputStream *uniIn = nsnull;
  nsString *utf8 = new nsString("UTF-8");

  nsresult res = NS_NewConverterStream(&uniIn,
                                       nsnull,
                                       in,
                                       aCount,
                                       utf8);

  //
  PRUint32 aReadCount = 0;
  uniBuf = (PRUnichar *) PR_Malloc(bufsize);

  while (NS_OK == (res=uniIn->Read(uniBuf, retLen, aCount, &aReadCount))) {
    retLen += aReadCount;
    if (((aReadCount+32) >= aCount) &&
        ((retLen+aCount) >= bufsize)) {

      bufsize += aCount;
      uniBuf = (PRUnichar *) PR_Realloc(uniBuf, bufsize*sizeof(PRUnichar));
    }
  }/* while */
  return res;
}
void nsExpatTokenizer::HandleNotationDecl(void *userData,
                                    const XML_Char *notationName,
                                    const XML_Char *base,
                                    const XML_Char *systemId,
                                    const XML_Char *publicId){
  NS_NOTYETIMPLEMENTED("Error: nsExpatTokenizer::HandleNotationDecl() not yet implemented.");
}

int nsExpatTokenizer::HandleExternalEntityRef(XML_Parser parser,
                                         const XML_Char *openEntityNames,
                                         const XML_Char *base,
                                         const XML_Char *systemId,
                                         const XML_Char *publicId){
#ifdef XML_DTD
  /* create an extent parser
   */
  nsAutoString buffer("UTF-16");
  const PRUnichar* encoding = buffer.GetUnicode();
  XML_Parser entParser = 
    XML_ExternalEntityParserCreate(parser, 
                                   0, 
                                   encoding?(const XML_Char*) encoding:0);

  int result = 1;

  nsIInputStream *in = nsnull;
  nsString2       s((PRUnichar *)systemId);
  nsresult        res = OpenInputStream(s, in);
  PRUint32        retLen = 0;
  PRUnichar      *uniBuf = nsnull;

  res = LoadStream(in, uniBuf, retLen);
  NS_RELEASE(in);

  result = XML_Parse(entParser, (char *)uniBuf,  retLen*sizeof(PRUnichar), 1);
  XML_ParserFree(entParser);

  // free uniBuf too??
  PR_FREEIF(uniBuf);

#else /* ! XML_DTD */

  NS_NOTYETIMPLEMENTED("Error: nsExpatTokenizer::HandleExternalEntityRef() not yet implemented.");
  int result=0;

#endif /* XML_DTD */

  return result;
}

int nsExpatTokenizer::HandleUnknownEncoding(void *encodingHandlerData,
                                       const XML_Char *name,
                                       XML_Encoding *info) {
  NS_NOTYETIMPLEMENTED("Error: nsExpatTokenizer::HandleUnknownEncoding() not yet implemented.");
  int result=0;
  return result;
}

